aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/AArch64
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-07-21 00:45:20 -0700
committerStephen Hines <srhines@google.com>2014-07-25 00:48:57 -0700
commitcd81d94322a39503e4a3e87b6ee03d4fcb3465fb (patch)
tree81b7dd2bb4370a392f31d332a566c903b5744764 /lib/Target/AArch64
parent0c5f13c0c4499eaf42ab5e9e2ceabd4e20e36861 (diff)
downloadexternal_llvm-cd81d94322a39503e4a3e87b6ee03d4fcb3465fb.zip
external_llvm-cd81d94322a39503e4a3e87b6ee03d4fcb3465fb.tar.gz
external_llvm-cd81d94322a39503e4a3e87b6ee03d4fcb3465fb.tar.bz2
Update LLVM for rebase to r212749.
Includes a cherry-pick of: r212948 - fixes a small issue with atomic calls Change-Id: Ib97bd980b59f18142a69506400911a6009d9df18
Diffstat (limited to 'lib/Target/AArch64')
-rw-r--r--lib/Target/AArch64/AArch64.td3
-rw-r--r--lib/Target/AArch64/AArch64AddressTypePromotion.cpp21
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp2
-rw-r--r--lib/Target/AArch64/AArch64BranchRelaxation.cpp6
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.td14
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp51
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp7
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.h11
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp10
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp279
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h5
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td65
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp48
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h4
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td89
-rw-r--r--lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp81
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp4
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.td2
-rw-r--r--lib/Target/AArch64/AArch64SchedA53.td4
-rw-r--r--lib/Target/AArch64/AArch64SchedA57.td304
-rw-r--r--lib/Target/AArch64/AArch64SchedA57WriteRes.td512
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.cpp11
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.h6
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp28
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h34
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp36
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h27
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp64
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp461
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp3
-rw-r--r--lib/Target/AArch64/Disassembler/CMakeLists.txt8
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp8
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp33
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp40
-rw-r--r--lib/Target/AArch64/MCTargetDesc/Android.mk3
-rw-r--r--lib/Target/AArch64/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h20
42 files changed, 1780 insertions, 537 deletions
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index 1ad5ac8..e6a27c3 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -60,6 +60,7 @@ def AArch64InstrInfo : InstrInfo;
// AArch64 Processors supported.
//
include "AArch64SchedA53.td"
+include "AArch64SchedA57.td"
include "AArch64SchedCyclone.td"
def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
@@ -89,7 +90,7 @@ def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
FeatureCRC]>;
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
-def : ProcessorModel<"cortex-a57", NoSchedModel, [ProcA57]>;
+def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
index 04906f6..ab2c4b7 100644
--- a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
+++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
@@ -214,8 +214,8 @@ AArch64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const {
if (SExt->getType() != ConsideredSExtType)
return false;
- for (const Use &U : SExt->uses()) {
- if (isa<GetElementPtrInst>(*U))
+ for (const User *U : SExt->users()) {
+ if (isa<GetElementPtrInst>(U))
return true;
}
@@ -267,8 +267,7 @@ AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
}
// Now try to get through the chain of definitions.
- while (isa<Instruction>(SExt->getOperand(0))) {
- Instruction *Inst = dyn_cast<Instruction>(SExt->getOperand(0));
+ while (auto *Inst = dyn_cast<Instruction>(SExt->getOperand(0))) {
DEBUG(dbgs() << "Try to get through:\n" << *Inst << '\n');
if (!canGetThrough(Inst) || !shouldGetThrough(Inst)) {
// We cannot get through something that is not an Instruction
@@ -285,10 +284,10 @@ AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
// assertion on the type as all involved sext operation may have not
// been moved yet.
while (!Inst->use_empty()) {
- Value::use_iterator UseIt = Inst->use_begin();
- Instruction *UseInst = dyn_cast<Instruction>(*UseIt);
- assert(UseInst && "Use of sext is not an Instruction!");
- UseInst->setOperand(UseIt->getOperandNo(), SExt);
+ Use &U = *Inst->use_begin();
+ Instruction *User = dyn_cast<Instruction>(U.getUser());
+ assert(User && "User of sext is not an Instruction!");
+ User->setOperand(U.getOperandNo(), SExt);
}
ToRemove.insert(Inst);
SExt->setOperand(0, Inst->getOperand(0));
@@ -385,11 +384,11 @@ void AArch64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses,
if (ToRemove.count(Inst))
continue;
bool inserted = false;
- for (auto Pt : CurPts) {
+ for (auto &Pt : CurPts) {
if (DT.dominates(Inst, Pt)) {
DEBUG(dbgs() << "Replace all uses of:\n" << *Pt << "\nwith:\n"
<< *Inst << '\n');
- (Pt)->replaceAllUsesWith(Inst);
+ Pt->replaceAllUsesWith(Inst);
ToRemove.insert(Pt);
Pt = Inst;
inserted = true;
@@ -436,7 +435,7 @@ void AArch64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) {
bool insert = false;
// #1.
- for (const Use &U : SExt->uses()) {
+ for (const User *U : SExt->users()) {
const Instruction *Inst = dyn_cast<GetElementPtrInst>(U);
if (Inst && Inst->getNumOperands() > 2) {
DEBUG(dbgs() << "Interesting use in GetElementPtrInst\n" << *Inst
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index c3ee9bb..cd94e24 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -211,7 +211,7 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
const MachineOperand &MO = MI->getOperand(OpNum);
switch (MO.getType()) {
default:
- assert(0 && "<unknown operand type>");
+ llvm_unreachable("<unknown operand type>");
case MachineOperand::MO_Register: {
unsigned Reg = MO.getReg();
assert(TargetRegisterInfo::isPhysicalRegister(Reg));
diff --git a/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
index 5209452..484e7e8 100644
--- a/lib/Target/AArch64/AArch64BranchRelaxation.cpp
+++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
@@ -291,7 +291,7 @@ static bool isConditionalBranch(unsigned Opc) {
static MachineBasicBlock *getDestBlock(MachineInstr *MI) {
switch (MI->getOpcode()) {
default:
- assert(0 && "unexpected opcode!");
+ llvm_unreachable("unexpected opcode!");
case AArch64::TBZW:
case AArch64::TBNZW:
case AArch64::TBZX:
@@ -309,7 +309,7 @@ static MachineBasicBlock *getDestBlock(MachineInstr *MI) {
static unsigned getOppositeConditionOpcode(unsigned Opc) {
switch (Opc) {
default:
- assert(0 && "unexpected opcode!");
+ llvm_unreachable("unexpected opcode!");
case AArch64::TBNZW: return AArch64::TBZW;
case AArch64::TBNZX: return AArch64::TBZX;
case AArch64::TBZW: return AArch64::TBNZW;
@@ -325,7 +325,7 @@ static unsigned getOppositeConditionOpcode(unsigned Opc) {
static unsigned getBranchDisplacementBits(unsigned Opc) {
switch (Opc) {
default:
- assert(0 && "unexpected opcode!");
+ llvm_unreachable("unexpected opcode!");
case AArch64::TBNZW:
case AArch64::TBZW:
case AArch64::TBNZX:
diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td
index ded2e17..8e8bd3d 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/lib/Target/AArch64/AArch64CallingConvention.td
@@ -18,9 +18,6 @@ class CCIfAlign<string Align, CCAction A> :
class CCIfBigEndian<CCAction A> :
CCIf<"State.getTarget().getDataLayout()->isBigEndian()", A>;
-class CCIfUnallocated<string Reg, CCAction A> :
- CCIf<"!State.isAllocated(AArch64::" # Reg # ")", A>;
-
//===----------------------------------------------------------------------===//
// ARM AAPCS64 Calling Convention
//===----------------------------------------------------------------------===//
@@ -45,7 +42,7 @@ def CC_AArch64_AAPCS : CallingConv<[
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
- CCIfType<[i1, i8, i16], CCIfUnallocated<"X7", CCPromoteToType<i32>>>,
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
// i128 is split to two i64s, we can't fit half to register X7.
@@ -120,7 +117,7 @@ def CC_AArch64_DarwinPCS : CallingConv<[
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
- CCIfType<[i1, i8, i16], CCIfUnallocated<"X7", CCPromoteToType<i32>>>,
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
// i128 is split to two i64s, we can't fit half to register X7.
@@ -143,8 +140,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
// If more than will fit in registers, pass them on the stack instead.
- CCIfType<[i1, i8], CCAssignToStack<1, 1>>,
- CCIfType<[i16], CCAssignToStack<2, 2>>,
+ CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>,
+ CCIf<"ValVT == MVT::i16", CCAssignToStack<2, 2>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8],
CCAssignToStack<8, 8>>,
@@ -172,12 +169,11 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
// 32bit quantity as undef.
def CC_AArch64_WebKit_JS : CallingConv<[
// Handle i1, i8, i16, i32, and i64 passing in register X0 (W0).
- CCIfType<[i1, i8, i16], CCIfUnallocated<"X0", CCPromoteToType<i32>>>,
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>,
CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>,
// Pass the remaining arguments on the stack instead.
- CCIfType<[i1, i8, i16], CCAssignToStack<4, 4>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
CCIfType<[i64, f64], CCAssignToStack<8, 8>>
]>;
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index c3b5369..2164d77 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -240,21 +240,15 @@ unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
}
unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
- // We can't handle thread-local variables quickly yet. Unfortunately we have
- // to peer through any aliases to find out if that rule applies.
- const GlobalValue *TLSGV = GV;
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- TLSGV = GA->getAliasee();
+ // We can't handle thread-local variables quickly yet.
+ if (GV->isThreadLocal())
+ return 0;
// MachO still uses GOT for large code-model accesses, but ELF requires
// movz/movk sequences, which FastISel doesn't handle yet.
if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
return 0;
- if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(TLSGV))
- if (GVar->isThreadLocal())
- return 0;
-
unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
EVT DestEVT = TLI.getValueType(GV->getType(), true);
@@ -469,11 +463,18 @@ bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
break;
}
- // FIXME: If this is a stack pointer and the offset needs to be simplified
- // then put the alloca address into a register, set the base type back to
- // register and continue. This should almost never happen.
+ //If this is a stack pointer and the offset needs to be simplified then put
+ // the alloca address into a register, set the base type back to register and
+ // continue. This should almost never happen.
if (needsLowering && Addr.getKind() == Address::FrameIndexBase) {
- return false;
+ unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
+ ResultReg)
+ .addFrameIndex(Addr.getFI())
+ .addImm(0)
+ .addImm(0);
+ Addr.setKind(Address::RegBase);
+ Addr.setReg(ResultReg);
}
// Since the offset is too large for the load/store instruction get the
@@ -1224,7 +1225,6 @@ bool AArch64FastISel::ProcessCallArgs(
Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ false);
if (Arg == 0)
return false;
- ArgVT = DestVT;
break;
}
case CCValAssign::AExt:
@@ -1235,7 +1235,6 @@ bool AArch64FastISel::ProcessCallArgs(
Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ true);
if (Arg == 0)
return false;
- ArgVT = DestVT;
break;
}
default:
@@ -1254,7 +1253,7 @@ bool AArch64FastISel::ProcessCallArgs(
assert(VA.isMemLoc() && "Assuming store on stack.");
// Need to store on the stack.
- unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
+ unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
unsigned BEAlign = 0;
if (ArgSize < 8 && !Subtarget->isLittleEndian())
@@ -1468,10 +1467,12 @@ bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
bool RV;
unsigned ResultReg;
RV = EmitLoad(VT, ResultReg, Src);
- assert(RV == true && "Should be able to handle this load.");
+ if (!RV)
+ return false;
+
RV = EmitStore(VT, ResultReg, Dest);
- assert(RV == true && "Should be able to handle this store.");
- (void)RV;
+ if (!RV)
+ return false;
int64_t Size = VT.getSizeInBits() / 8;
Len -= Size;
@@ -1749,6 +1750,17 @@ unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
bool isZExt) {
assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
+
+ // FastISel does not have plumbing to deal with extensions where the SrcVT or
+ // DestVT are odd things, so test to make sure that they are both types we can
+ // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
+ // bail out to SelectionDAG.
+ if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
+ (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
+ ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
+ (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
+ return 0;
+
unsigned Opc;
unsigned Imm = 0;
@@ -1895,6 +1907,7 @@ bool AArch64FastISel::SelectMul(const Instruction *I) {
case MVT::i32:
ZReg = AArch64::WZR;
Opc = AArch64::MADDWrrr;
+ SrcVT = MVT::i32;
break;
case MVT::i64:
ZReg = AArch64::XZR;
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index deb306a..9c33717 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -158,7 +158,7 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- const AArch64InstrInfo *TII = TM.getInstrInfo();
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
DebugLoc DL = MBB.findDebugLoc(MBBI);
// Add callee saved registers to move list.
@@ -204,8 +204,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *Fn = MF.getFunction();
- const AArch64RegisterInfo *RegInfo = TM.getRegisterInfo();
- const AArch64InstrInfo *TII = TM.getInstrInfo();
+ const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
+ MF.getTarget().getRegisterInfo());
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
index 0e00d16..7686e6f 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -18,18 +18,11 @@
namespace llvm {
-class AArch64Subtarget;
-class AArch64TargetMachine;
-
class AArch64FrameLowering : public TargetFrameLowering {
- const AArch64TargetMachine &TM;
-
public:
- explicit AArch64FrameLowering(const AArch64TargetMachine &TM,
- const AArch64Subtarget &STI)
+ explicit AArch64FrameLowering()
: TargetFrameLowering(StackGrowsDown, 16, 0, 16,
- false /*StackRealignable*/),
- TM(TM) {}
+ false /*StackRealignable*/) {}
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 7007ffc..3f49fab 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -153,9 +153,6 @@ public:
SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
- SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node);
- SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node);
-
SDNode *SelectBitfieldExtractOp(SDNode *N);
SDNode *SelectBitfieldInsertOp(SDNode *N);
@@ -596,8 +593,9 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
const GlobalValue *GV = GAN->getGlobal();
unsigned Alignment = GV->getAlignment();
const DataLayout *DL = TLI->getDataLayout();
- if (Alignment == 0 && !Subtarget->isTargetDarwin())
- Alignment = DL->getABITypeAlignment(GV->getType()->getElementType());
+ Type *Ty = GV->getType()->getElementType();
+ if (Alignment == 0 && Ty->isSized() && !Subtarget->isTargetDarwin())
+ Alignment = DL->getABITypeAlignment(Ty);
if (Alignment >= Size)
return true;
@@ -2111,7 +2109,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
.getVectorElementType()
.getSizeInBits()) {
default:
- assert(0 && "Unexpected vector element type!");
+ llvm_unreachable("Unexpected vector element type!");
case 64:
SubReg = AArch64::dsub;
break;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 80d6669..28d0035 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -67,15 +67,15 @@ EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
//===----------------------------------------------------------------------===//
// AArch64 Lowering public interface.
//===----------------------------------------------------------------------===//
-static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
- if (TM.getSubtarget<AArch64Subtarget>().isTargetDarwin())
+static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
+ if (TT.isOSBinFormatMachO())
return new AArch64_MachoTargetObjectFile();
return new AArch64_ELFTargetObjectFile();
}
-AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
- : TargetLowering(TM, createTLOF(TM)) {
+AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM)
+ : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
Subtarget = &TM.getSubtarget<AArch64Subtarget>();
// AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
@@ -627,7 +627,7 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const {
unsigned AArch64TargetLowering::getMaximalGlobalOffset() const {
// FIXME: On AArch64, this depends on the type.
- // Basically, the addressable offsets are o to 4095 * Ty.getSizeInBytes().
+ // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
// and the offset has to be a multiple of the related size in bytes.
return 4095;
}
@@ -823,8 +823,7 @@ AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
#ifndef NDEBUG
MI->dump();
#endif
- assert(0 && "Unexpected instruction for custom inserter!");
- break;
+ llvm_unreachable("Unexpected instruction for custom inserter!");
case AArch64::F128CSEL:
return EmitF128CSEL(MI, BB);
@@ -833,7 +832,6 @@ AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
}
- llvm_unreachable("Unexpected instruction for custom inserter!");
}
//===----------------------------------------------------------------------===//
@@ -1273,7 +1271,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
bool ExtraOp = false;
switch (Op.getOpcode()) {
default:
- assert(0 && "Invalid code");
+ llvm_unreachable("Invalid code");
case ISD::ADDC:
Opc = AArch64ISD::ADDS;
break;
@@ -1387,24 +1385,22 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
EVT InVT = Op.getOperand(0).getValueType();
EVT VT = Op.getValueType();
- // FP_TO_XINT conversion from the same type are legal.
- if (VT.getSizeInBits() == InVT.getSizeInBits())
- return Op;
-
- if (InVT == MVT::v2f64 || InVT == MVT::v4f32) {
+ if (VT.getSizeInBits() < InVT.getSizeInBits()) {
SDLoc dl(Op);
SDValue Cv =
DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
Op.getOperand(0));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
- } else if (InVT == MVT::v2f32) {
+ }
+
+ if (VT.getSizeInBits() > InVT.getSizeInBits()) {
SDLoc dl(Op);
SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, Op.getOperand(0));
return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
}
// Type changing conversions are illegal.
- return SDValue();
+ return Op;
}
SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
@@ -1440,32 +1436,23 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
SDValue In = Op.getOperand(0);
EVT InVT = In.getValueType();
- // v2i32 to v2f32 is legal.
- if (VT == MVT::v2f32 && InVT == MVT::v2i32)
- return Op;
-
- // This function only handles v2f64 outputs.
- if (VT == MVT::v2f64) {
- // Extend the input argument to a v2i64 that we can feed into the
- // floating point conversion. Zero or sign extend based on whether
- // we're doing a signed or unsigned float conversion.
- unsigned Opc =
- Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
- assert(Op.getNumOperands() == 1 && "FP conversions take one argument");
- SDValue Promoted = DAG.getNode(Opc, dl, MVT::v2i64, Op.getOperand(0));
- return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Promoted);
+ if (VT.getSizeInBits() < InVT.getSizeInBits()) {
+ MVT CastVT =
+ MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
+ InVT.getVectorNumElements());
+ In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
+ return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0));
}
- // Scalarize v2i64 to v2f32 conversions.
- std::vector<SDValue> BuildVectorOps;
- for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
- SDValue Sclr = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, In,
- DAG.getConstant(i, MVT::i64));
- Sclr = DAG.getNode(Op->getOpcode(), dl, MVT::f32, Sclr);
- BuildVectorOps.push_back(Sclr);
+ if (VT.getSizeInBits() > InVT.getSizeInBits()) {
+ unsigned CastOpc =
+ Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ EVT CastVT = VT.changeVectorElementTypeToInteger();
+ In = DAG.getNode(CastOpc, dl, CastVT, In);
+ return DAG.getNode(Op.getOpcode(), dl, VT, In);
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BuildVectorOps);
+ return Op;
}
SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
@@ -1516,7 +1503,7 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(CallingConv::Fast, RetTy, Callee, &Args, 0);
+ .setCallee(CallingConv::Fast, RetTy, Callee, std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
@@ -1711,7 +1698,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
InVals.push_back(FrameIdxN);
continue;
- } if (VA.isRegLoc()) {
+ }
+
+ if (VA.isRegLoc()) {
// Arguments stored in registers.
EVT RegVT = VA.getLocVT();
@@ -1772,10 +1761,16 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
SDValue ArgValue;
+ // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+ MVT MemVT = VA.getValVT();
+
switch (VA.getLocInfo()) {
default:
break;
+ case CCValAssign::BCvt:
+ MemVT = VA.getLocVT();
+ break;
case CCValAssign::SExt:
ExtType = ISD::SEXTLOAD;
break;
@@ -1787,10 +1782,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
break;
}
- ArgValue = DAG.getExtLoad(ExtType, DL, VA.getValVT(), Chain, FIN,
+ ArgValue = DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
- VA.getLocVT(),
- false, false, false, 0);
+ MemVT, false, false, false, nullptr);
InVals.push_back(ArgValue);
}
@@ -2339,11 +2333,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
// promoted to a legal register type i32, we should truncate Arg back to
// i1/i8/i16.
- if (Arg.getValueType().isSimple() &&
- Arg.getValueType().getSimpleVT() == MVT::i32 &&
- (VA.getLocVT() == MVT::i1 || VA.getLocVT() == MVT::i8 ||
- VA.getLocVT() == MVT::i16))
- Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getLocVT(), Arg);
+ if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
+ VA.getValVT() == MVT::i16)
+ Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
SDValue Store =
DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, false, false, 0);
@@ -4116,6 +4108,7 @@ static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
// shuffle in combination with VEXTs.
SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
SDLoc dl(Op);
EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements();
@@ -4164,35 +4157,47 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
int VEXTOffsets[2] = { 0, 0 };
+ int OffsetMultipliers[2] = { 1, 1 };
// This loop extracts the usage patterns of the source vectors
// and prepares appropriate SDValues for a shuffle if possible.
for (unsigned i = 0; i < SourceVecs.size(); ++i) {
- if (SourceVecs[i].getValueType() == VT) {
+ unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements();
+ SDValue CurSource = SourceVecs[i];
+ if (SourceVecs[i].getValueType().getVectorElementType() !=
+ VT.getVectorElementType()) {
+ // It may hit this case if SourceVecs[i] is AssertSext/AssertZext.
+ // Then bitcast it to the vector which holds asserted element type,
+ // and record the multiplier of element width between SourceVecs and
+ // Build_vector which is needed to extract the correct lanes later.
+ EVT CastVT =
+ EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ SourceVecs[i].getValueSizeInBits() /
+ VT.getVectorElementType().getSizeInBits());
+
+ CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]);
+ OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts;
+ NumSrcElts *= OffsetMultipliers[i];
+ MaxElts[i] *= OffsetMultipliers[i];
+ MinElts[i] *= OffsetMultipliers[i];
+ }
+
+ if (CurSource.getValueType() == VT) {
// No VEXT necessary
- ShuffleSrcs[i] = SourceVecs[i];
+ ShuffleSrcs[i] = CurSource;
VEXTOffsets[i] = 0;
continue;
- } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
+ } else if (NumSrcElts < NumElts) {
// We can pad out the smaller vector for free, so if it's part of a
// shuffle...
- ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, SourceVecs[i],
- DAG.getUNDEF(SourceVecs[i].getValueType()));
+ ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, CurSource,
+ DAG.getUNDEF(CurSource.getValueType()));
continue;
}
- // Don't attempt to extract subvectors from BUILD_VECTOR sources
- // that expand or trunc the original value.
- // TODO: We can try to bitcast and ANY_EXTEND the result but
- // we need to consider the cost of vector ANY_EXTEND, and the
- // legality of all the types.
- if (SourceVecs[i].getValueType().getVectorElementType() !=
- VT.getVectorElementType())
- return SDValue();
-
// Since only 64-bit and 128-bit vectors are legal on ARM and
// we've eliminated the other cases...
- assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 * NumElts &&
+ assert(NumSrcElts == 2 * NumElts &&
"unexpected vector sizes in ReconstructShuffle");
if (MaxElts[i] - MinElts[i] >= NumElts) {
@@ -4203,22 +4208,20 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
if (MinElts[i] >= NumElts) {
// The extraction can just take the second half
VEXTOffsets[i] = NumElts;
- ShuffleSrcs[i] =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
- DAG.getIntPtrConstant(NumElts));
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+ DAG.getIntPtrConstant(NumElts));
} else if (MaxElts[i] < NumElts) {
// The extraction can just take the first half
VEXTOffsets[i] = 0;
- ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i], DAG.getIntPtrConstant(0));
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+ DAG.getIntPtrConstant(0));
} else {
// An actual VEXT is needed
VEXTOffsets[i] = MinElts[i];
- SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i], DAG.getIntPtrConstant(0));
- SDValue VEXTSrc2 =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
- DAG.getIntPtrConstant(NumElts));
+ SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+ DAG.getIntPtrConstant(0));
+ SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+ DAG.getIntPtrConstant(NumElts));
unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2,
DAG.getConstant(Imm, MVT::i32));
@@ -4238,9 +4241,10 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
int ExtractElt =
cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue();
if (ExtractVec == SourceVecs[0]) {
- Mask.push_back(ExtractElt - VEXTOffsets[0]);
+ Mask.push_back(ExtractElt * OffsetMultipliers[0] - VEXTOffsets[0]);
} else {
- Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
+ Mask.push_back(ExtractElt * OffsetMultipliers[1] + NumElts -
+ VEXTOffsets[1]);
}
}
@@ -5177,11 +5181,37 @@ FailedModImm:
return Op;
}
+// Normalize the operands of BUILD_VECTOR. The value of constant operands will
+// be truncated to fit element width.
+static SDValue NormalizeBuildVector(SDValue Op,
+ SelectionDAG &DAG) {
+ assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ EVT EltTy= VT.getVectorElementType();
+
+ if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16)
+ return Op;
+
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned I = 0, E = VT.getVectorNumElements(); I != E; ++I) {
+ SDValue Lane = Op.getOperand(I);
+ if (Lane.getOpcode() == ISD::Constant) {
+ APInt LowBits(EltTy.getSizeInBits(),
+ cast<ConstantSDNode>(Lane)->getZExtValue());
+ Lane = DAG.getConstant(LowBits.getZExtValue(), MVT::i32);
+ }
+ Ops.push_back(Lane);
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+}
+
SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
- BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
SDLoc dl(Op);
EVT VT = Op.getValueType();
+ Op = NormalizeBuildVector(Op, DAG);
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
APInt CnstBits(VT.getSizeInBits(), 0);
APInt UndefBits(VT.getSizeInBits(), 0);
@@ -6047,18 +6077,14 @@ bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
return false;
unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
- if (NumBits1 <= NumBits2)
- return false;
- return true;
+ return NumBits1 > NumBits2;
}
bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
- if (!VT1.isInteger() || !VT2.isInteger())
+ if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
return false;
unsigned NumBits1 = VT1.getSizeInBits();
unsigned NumBits2 = VT2.getSizeInBits();
- if (NumBits1 <= NumBits2)
- return false;
- return true;
+ return NumBits1 > NumBits2;
}
// All 32-bit GPR operations implicitly zero the high-half of the corresponding
@@ -6068,18 +6094,14 @@ bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
return false;
unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
- if (NumBits1 == 32 && NumBits2 == 64)
- return true;
- return false;
+ return NumBits1 == 32 && NumBits2 == 64;
}
bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
- if (!VT1.isInteger() || !VT2.isInteger())
+ if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
return false;
unsigned NumBits1 = VT1.getSizeInBits();
unsigned NumBits2 = VT2.getSizeInBits();
- if (NumBits1 == 32 && NumBits2 == 64)
- return true;
- return false;
+ return NumBits1 == 32 && NumBits2 == 64;
}
bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
@@ -6092,8 +6114,9 @@ bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
return false;
// 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
- return (VT1.isSimple() && VT1.isInteger() && VT2.isSimple() &&
- VT2.isInteger() && VT1.getSizeInBits() <= 32);
+ return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() &&
+ VT2.isSimple() && !VT2.isVector() && VT2.isInteger() &&
+ VT1.getSizeInBits() <= 32);
}
bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType,
@@ -6346,23 +6369,45 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
APInt Value = C->getAPIntValue();
EVT VT = N->getValueType(0);
- APInt VP1 = Value + 1;
- if (VP1.isPowerOf2()) {
- // Multiplying by one less than a power of two, replace with a shift
- // and a subtract.
- SDValue ShiftedVal =
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
- DAG.getConstant(VP1.logBase2(), MVT::i64));
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
- }
- APInt VM1 = Value - 1;
- if (VM1.isPowerOf2()) {
- // Multiplying by one more than a power of two, replace with a shift
- // and an add.
- SDValue ShiftedVal =
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
- DAG.getConstant(VM1.logBase2(), MVT::i64));
- return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
+ if (Value.isNonNegative()) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ APInt VM1 = Value - 1;
+ if (VM1.isPowerOf2()) {
+ SDValue ShiftedVal =
+ DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
+ DAG.getConstant(VM1.logBase2(), MVT::i64));
+ return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal,
+ N->getOperand(0));
+ }
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ APInt VP1 = Value + 1;
+ if (VP1.isPowerOf2()) {
+ SDValue ShiftedVal =
+ DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
+ DAG.getConstant(VP1.logBase2(), MVT::i64));
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal,
+ N->getOperand(0));
+ }
+ } else {
+ // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+ APInt VNM1 = -Value - 1;
+ if (VNM1.isPowerOf2()) {
+ SDValue ShiftedVal =
+ DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
+ DAG.getConstant(VNM1.logBase2(), MVT::i64));
+ SDValue Add =
+ DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), Add);
+ }
+ // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+ APInt VNP1 = -Value + 1;
+ if (VNP1.isPowerOf2()) {
+ SDValue ShiftedVal =
+ DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
+ DAG.getConstant(VNP1.logBase2(), MVT::i64));
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, N->getOperand(0),
+ ShiftedVal);
+ }
}
}
return SDValue();
@@ -6687,7 +6732,7 @@ static SDValue tryCombineFixedPointConvert(SDNode *N,
else if (Vec.getValueType() == MVT::v2i64)
VecResTy = MVT::v2f64;
else
- assert(0 && "unexpected vector type!");
+ llvm_unreachable("unexpected vector type!");
SDValue Convert =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
@@ -7020,7 +7065,7 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits)
return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1),
DAG.getConstant(-ShiftAmount, MVT::i32));
- else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount <= ElemBits)
+ else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits)
return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1),
DAG.getConstant(ShiftAmount, MVT::i32));
@@ -7867,6 +7912,18 @@ bool AArch64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
return Inst->getType()->getPrimitiveSizeInBits() <= 128;
}
+TargetLoweringBase::LegalizeTypeAction
+AArch64TargetLowering::getPreferredVectorAction(EVT VT) const {
+ MVT SVT = VT.getSimpleVT();
+ // During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
+ // v4i16, v2i32 instead of to promote.
+ if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32
+ || SVT == MVT::v1f32)
+ return TypeWidenVector;
+
+ return TargetLoweringBase::getPreferredVectorAction(VT);
+}
+
Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index de16c4d..cb0b9ef 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -197,7 +197,7 @@ class AArch64TargetLowering : public TargetLowering {
bool RequireStrictAlign;
public:
- explicit AArch64TargetLowering(AArch64TargetMachine &TM);
+ explicit AArch64TargetLowering(TargetMachine &TM);
/// Selects the correct CCAssignFn for a the given CallingConvention
/// value.
@@ -324,6 +324,9 @@ public:
bool shouldExpandAtomicInIR(Instruction *Inst) const override;
+ TargetLoweringBase::LegalizeTypeAction
+ getPreferredVectorAction(EVT VT) const override;
+
private:
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index d455d7e..5007172 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -448,13 +448,19 @@ def logical_imm64_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(enc, MVT::i32);
}]>;
-def LogicalImm32Operand : AsmOperandClass {
- let Name = "LogicalImm32";
- let DiagnosticType = "LogicalSecondSource";
-}
-def LogicalImm64Operand : AsmOperandClass {
- let Name = "LogicalImm64";
- let DiagnosticType = "LogicalSecondSource";
+let DiagnosticType = "LogicalSecondSource" in {
+ def LogicalImm32Operand : AsmOperandClass {
+ let Name = "LogicalImm32";
+ }
+ def LogicalImm64Operand : AsmOperandClass {
+ let Name = "LogicalImm64";
+ }
+ def LogicalImm32NotOperand : AsmOperandClass {
+ let Name = "LogicalImm32Not";
+ }
+ def LogicalImm64NotOperand : AsmOperandClass {
+ let Name = "LogicalImm64Not";
+ }
}
def logical_imm32 : Operand<i32>, PatLeaf<(imm), [{
return AArch64_AM::isLogicalImmediate(N->getZExtValue(), 32);
@@ -468,6 +474,12 @@ def logical_imm64 : Operand<i64>, PatLeaf<(imm), [{
let PrintMethod = "printLogicalImm64";
let ParserMatchClass = LogicalImm64Operand;
}
+def logical_imm32_not : Operand<i32> {
+ let ParserMatchClass = LogicalImm32NotOperand;
+}
+def logical_imm64_not : Operand<i64> {
+ let ParserMatchClass = LogicalImm64NotOperand;
+}
// imm0_65535 predicate - True if the immediate is in the range [0,65535].
def Imm0_65535Operand : AsmImmRange<0, 65535>;
@@ -963,8 +975,14 @@ def ccode : Operand<i32> {
let ParserMatchClass = CondCode;
}
def inv_ccode : Operand<i32> {
+ // AL and NV are invalid in the aliases which use inv_ccode
let PrintMethod = "printInverseCondCode";
let ParserMatchClass = CondCode;
+ let MCOperandPredicate = [{
+ return MCOp.isImm() &&
+ MCOp.getImm() != AArch64CC::AL &&
+ MCOp.getImm() != AArch64CC::NV;
+ }];
}
// Conditional branch target. 19-bit immediate. The low two bits of the target
@@ -1323,13 +1341,13 @@ class BaseMulAccum<bit isSub, bits<3> opc, RegisterClass multype,
multiclass MulAccum<bit isSub, string asm, SDNode AccNode> {
def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm,
[(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))]>,
- Sched<[WriteIM32, ReadIMA, ReadIM, ReadIM]> {
+ Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> {
let Inst{31} = 0;
}
def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm,
[(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))]>,
- Sched<[WriteIM64, ReadIMA, ReadIM, ReadIM]> {
+ Sched<[WriteIM64, ReadIM, ReadIM, ReadIMA]> {
let Inst{31} = 1;
}
}
@@ -1339,7 +1357,7 @@ class WideMulAccum<bit isSub, bits<3> opc, string asm,
: BaseMulAccum<isSub, opc, GPR32, GPR64, asm,
[(set GPR64:$Rd, (AccNode GPR64:$Ra,
(mul (ExtNode GPR32:$Rn), (ExtNode GPR32:$Rm))))]>,
- Sched<[WriteIM32, ReadIMA, ReadIM, ReadIM]> {
+ Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> {
let Inst{31} = 1;
}
@@ -1738,6 +1756,10 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp> {
WZR, GPR32:$src1, GPR32:$src2, 0), 5>;
def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Xrs")
XZR, GPR64:$src1, GPR64:$src2, 0), 5>;
+ def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Wrx")
+ WZR, GPR32sponly:$src1, GPR32:$src2, 16), 5>;
+ def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Xrx64")
+ XZR, GPR64sponly:$src1, GPR64:$src2, 24), 5>;
// Register/register aliases with no shift when SP is not used.
def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrs"),
@@ -1925,22 +1947,32 @@ class LogicalRegAlias<string asm, Instruction inst, RegisterClass regtype>
: InstAlias<asm#" $dst, $src1, $src2",
(inst regtype:$dst, regtype:$src1, regtype:$src2, 0)>;
-let AddedComplexity = 6 in
-multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode> {
+multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode,
+ string Alias> {
+ let AddedComplexity = 6 in
def Wri : BaseLogicalImm<opc, GPR32sp, GPR32, logical_imm32, mnemonic,
[(set GPR32sp:$Rd, (OpNode GPR32:$Rn,
logical_imm32:$imm))]> {
let Inst{31} = 0;
let Inst{22} = 0; // 64-bit version has an additional bit of immediate.
}
+ let AddedComplexity = 6 in
def Xri : BaseLogicalImm<opc, GPR64sp, GPR64, logical_imm64, mnemonic,
[(set GPR64sp:$Rd, (OpNode GPR64:$Rn,
logical_imm64:$imm))]> {
let Inst{31} = 1;
}
+
+ def : InstAlias<Alias # " $Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32:$Rn,
+ logical_imm32_not:$imm), 0>;
+ def : InstAlias<Alias # " $Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64:$Rn,
+ logical_imm64_not:$imm), 0>;
}
-multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode> {
+multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode,
+ string Alias> {
let isCompare = 1, Defs = [NZCV] in {
def Wri : BaseLogicalImm<opc, GPR32, GPR32, logical_imm32, mnemonic,
[(set GPR32:$Rd, (OpNode GPR32:$Rn, logical_imm32:$imm))]> {
@@ -1952,6 +1984,13 @@ multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode> {
let Inst{31} = 1;
}
} // end Defs = [NZCV]
+
+ def : InstAlias<Alias # " $Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32:$Rn,
+ logical_imm32_not:$imm), 0>;
+ def : InstAlias<Alias # " $Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64:$Rn,
+ logical_imm64_not:$imm), 0>;
}
class BaseLogicalRegPseudo<RegisterClass regtype, SDPatternOperator OpNode>
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index ff115c0..ce85b2c 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -35,8 +35,14 @@ AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
- const MCInstrDesc &Desc = MI->getDesc();
+ const MachineBasicBlock &MBB = *MI->getParent();
+ const MachineFunction *MF = MBB.getParent();
+ const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+
+ if (MI->getOpcode() == AArch64::INLINEASM)
+ return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
+ const MCInstrDesc &Desc = MI->getDesc();
switch (Desc.getOpcode()) {
default:
// Anything not explicitly designated otherwise is a nomal 4-byte insn.
@@ -1224,7 +1230,7 @@ void AArch64InstrInfo::copyPhysRegTuple(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
llvm::ArrayRef<unsigned> Indices) const {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register copy without NEON");
const TargetRegisterInfo *TRI = &getRegisterInfo();
uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
@@ -1385,7 +1391,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR128RegClass.contains(DestReg) &&
AArch64::FPR128RegClass.contains(SrcReg)) {
- if(getSubTarget().hasNEON()) {
+ if(Subtarget.hasNEON()) {
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
.addReg(SrcReg)
.addReg(SrcReg, getKillRegState(KillSrc));
@@ -1406,7 +1412,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR64RegClass.contains(DestReg) &&
AArch64::FPR64RegClass.contains(SrcReg)) {
- if(getSubTarget().hasNEON()) {
+ if(Subtarget.hasNEON()) {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
&AArch64::FPR128RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
@@ -1423,7 +1429,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR32RegClass.contains(DestReg) &&
AArch64::FPR32RegClass.contains(SrcReg)) {
- if(getSubTarget().hasNEON()) {
+ if(Subtarget.hasNEON()) {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
&AArch64::FPR128RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
@@ -1440,7 +1446,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR16RegClass.contains(DestReg) &&
AArch64::FPR16RegClass.contains(SrcReg)) {
- if(getSubTarget().hasNEON()) {
+ if(Subtarget.hasNEON()) {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
&AArch64::FPR128RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
@@ -1461,7 +1467,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR8RegClass.contains(DestReg) &&
AArch64::FPR8RegClass.contains(SrcReg)) {
- if(getSubTarget().hasNEON()) {
+ if(Subtarget.hasNEON()) {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
&AArch64::FPR128RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
@@ -1577,39 +1583,39 @@ void AArch64InstrInfo::storeRegToStackSlot(
if (AArch64::FPR128RegClass.hasSubClassEq(RC))
Opc = AArch64::STRQui;
else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
Opc = AArch64::ST1Twov1d, Offset = false;
}
break;
case 24:
if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
Opc = AArch64::ST1Threev1d, Offset = false;
}
break;
case 32:
if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
Opc = AArch64::ST1Fourv1d, Offset = false;
} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
Opc = AArch64::ST1Twov2d, Offset = false;
}
break;
case 48:
if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
Opc = AArch64::ST1Threev2d, Offset = false;
}
break;
case 64:
if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
Opc = AArch64::ST1Fourv2d, Offset = false;
}
@@ -1675,39 +1681,39 @@ void AArch64InstrInfo::loadRegFromStackSlot(
if (AArch64::FPR128RegClass.hasSubClassEq(RC))
Opc = AArch64::LDRQui;
else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
Opc = AArch64::LD1Twov1d, Offset = false;
}
break;
case 24:
if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
Opc = AArch64::LD1Threev1d, Offset = false;
}
break;
case 32:
if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
Opc = AArch64::LD1Fourv1d, Offset = false;
} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
Opc = AArch64::LD1Twov2d, Offset = false;
}
break;
case 48:
if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
Opc = AArch64::LD1Threev2d, Offset = false;
}
break;
case 64:
if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
Opc = AArch64::LD1Fourv2d, Offset = false;
}
@@ -1726,7 +1732,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
void llvm::emitFrameOffset(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg, int Offset,
- const AArch64InstrInfo *TII,
+ const TargetInstrInfo *TII,
MachineInstr::MIFlag Flag, bool SetNZCV) {
if (DestReg == SrcReg && Offset == 0)
return;
@@ -1835,7 +1841,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
*OutUnscaledOp = 0;
switch (MI.getOpcode()) {
default:
- assert(0 && "unhandled opcode in rewriteAArch64FrameIndex");
+ llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
// Vector spills/fills can't take an immediate offset.
case AArch64::LD1Twov2d:
case AArch64::LD1Threev2d:
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index 90ce75f..f70b82b 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -44,8 +44,6 @@ public:
/// always be able to get register info as well (through this method).
const AArch64RegisterInfo &getRegisterInfo() const { return RI; }
- const AArch64Subtarget &getSubTarget() const { return Subtarget; }
-
unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
@@ -168,7 +166,7 @@ private:
/// if necessary, to be replaced by the scavenger at the end of PEI.
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
DebugLoc DL, unsigned DestReg, unsigned SrcReg, int Offset,
- const AArch64InstrInfo *TII,
+ const TargetInstrInfo *TII,
MachineInstr::MIFlag = MachineInstr::NoFlags,
bool SetNZCV = false);
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 9ad36e8..1211fba 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -323,7 +323,7 @@ def : Pat<(AArch64LOADgot tconstpool:$addr),
// System instructions.
//===----------------------------------------------------------------------===//
-def HINT : HintI<"hint">;
+def HINT : HintI<"hint">;
def : InstAlias<"nop", (HINT 0b000)>;
def : InstAlias<"yield",(HINT 0b001)>;
def : InstAlias<"wfe", (HINT 0b010)>;
@@ -671,10 +671,10 @@ def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
//===----------------------------------------------------------------------===//
// (immediate)
-defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag>;
-defm AND : LogicalImm<0b00, "and", and>;
-defm EOR : LogicalImm<0b10, "eor", xor>;
-defm ORR : LogicalImm<0b01, "orr", or>;
+defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">;
+defm AND : LogicalImm<0b00, "and", and, "bic">;
+defm EOR : LogicalImm<0b10, "eor", xor, "eon">;
+defm ORR : LogicalImm<0b01, "orr", or, "orn">;
// FIXME: these aliases *are* canonical sometimes (when movz can't be
// used). Actually, it seems to be working right now, but putting logical_immXX
@@ -737,6 +737,10 @@ def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>;
defm CLS : OneOperandData<0b101, "cls">;
defm CLZ : OneOperandData<0b100, "clz", ctlz>;
defm RBIT : OneOperandData<0b000, "rbit">;
+
+def : Pat<(int_aarch64_rbit GPR32:$Rn), (RBITWr $Rn)>;
+def : Pat<(int_aarch64_rbit GPR64:$Rn), (RBITXr $Rn)>;
+
def REV16Wr : OneWRegData<0b001, "rev16",
UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
def REV16Xr : OneXRegData<0b001, "rev16", null_frag>;
@@ -2238,6 +2242,81 @@ def : Pat<(f32_to_f16 FPR32:$Rn),
def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn),
[(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>;
+// When converting from f16 coming directly from a load, make sure we
+// load into the FPR16 registers rather than going through the GPRs.
+// f16->f32
+def : Pat<(f32 (f16_to_f32 (i32
+ (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend))))),
+ (FCVTSHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
+def : Pat<(f32 (f16_to_f32 (i32
+ (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend))))),
+ (FCVTSHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
+def : Pat <(f32 (f16_to_f32 (i32
+ (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (FCVTSHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
+def : Pat <(f32 (f16_to_f32 (i32
+ (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
+ (FCVTSHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
+
+// f16->f64
+def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
+ (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend))))))),
+ (FCVTDHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
+def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
+ (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend))))))),
+ (FCVTDHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
+def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
+ (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))))),
+ (FCVTDHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
+def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
+ (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))),
+ (FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
+
+// When converting to f16 going directly to a store, make sure we use the
+// appropriate direct conversion instructions and store via the FPR16
+// registers rather than going through the GPRs.
+let AddedComplexity = 10 in {
+// f32->f16
+def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
+ (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend)),
+ (STRHroW (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend)>;
+def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
+ (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)),
+ (STRHroX (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)>;
+def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
+ (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
+ (STRHui (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
+def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
+ (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
+ (STURHi (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, simm9:$offset)>;
+// f64->f16
+def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
+ (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend)),
+ (STRHroW (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend)>;
+def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
+ (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)),
+ (STRHroX (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)>;
+def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
+ (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
+ (STRHui (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
+def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
+ (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
+ (STURHi (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, simm9:$offset)>;
+}
+
+
//===----------------------------------------------------------------------===//
// Floating point single operand instructions.
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index e7454be..3df9c4f 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -40,14 +40,13 @@ STATISTIC(NumPreFolded, "Number of pre-index updates folded");
STATISTIC(NumUnscaledPairCreated,
"Number of load/store from unscaled generated");
-static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit", cl::init(20),
- cl::Hidden);
+static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit",
+ cl::init(20), cl::Hidden);
// Place holder while testing unscaled load/store combining
-static cl::opt<bool>
-EnableAArch64UnscaledMemOp("aarch64-unscaled-mem-op", cl::Hidden,
- cl::desc("Allow AArch64 unscaled load/store combining"),
- cl::init(true));
+static cl::opt<bool> EnableAArch64UnscaledMemOp(
+ "aarch64-unscaled-mem-op", cl::Hidden,
+ cl::desc("Allow AArch64 unscaled load/store combining"), cl::init(true));
namespace {
struct AArch64LoadStoreOpt : public MachineFunctionPass {
@@ -60,19 +59,19 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Scan the instructions looking for a load/store that can be combined
// with the current instruction into a load/store pair.
// Return the matching instruction if one is found, else MBB->end().
- // If a matching instruction is found, mergeForward is set to true if the
+ // If a matching instruction is found, MergeForward is set to true if the
// merge is to remove the first instruction and replace the second with
// a pair-wise insn, and false if the reverse is true.
MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
- bool &mergeForward,
+ bool &MergeForward,
unsigned Limit);
// Merge the two instructions indicated into a single pair-wise instruction.
- // If mergeForward is true, erase the first instruction and fold its
+ // If MergeForward is true, erase the first instruction and fold its
// operation into the second. If false, the reverse. Return the instruction
// following the first instruction (which may change during processing).
MachineBasicBlock::iterator
mergePairedInsns(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired, bool mergeForward);
+ MachineBasicBlock::iterator Paired, bool MergeForward);
// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
@@ -142,7 +141,7 @@ static bool isUnscaledLdst(unsigned Opc) {
int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
switch (MemMI->getOpcode()) {
default:
- llvm_unreachable("Opcode has has unknown size!");
+ llvm_unreachable("Opcode has unknown size!");
case AArch64::STRSui:
case AArch64::STURSi:
return 4;
@@ -217,16 +216,26 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
switch (Opc) {
default:
llvm_unreachable("Opcode has no pre-indexed equivalent!");
- case AArch64::STRSui: return AArch64::STRSpre;
- case AArch64::STRDui: return AArch64::STRDpre;
- case AArch64::STRQui: return AArch64::STRQpre;
- case AArch64::STRWui: return AArch64::STRWpre;
- case AArch64::STRXui: return AArch64::STRXpre;
- case AArch64::LDRSui: return AArch64::LDRSpre;
- case AArch64::LDRDui: return AArch64::LDRDpre;
- case AArch64::LDRQui: return AArch64::LDRQpre;
- case AArch64::LDRWui: return AArch64::LDRWpre;
- case AArch64::LDRXui: return AArch64::LDRXpre;
+ case AArch64::STRSui:
+ return AArch64::STRSpre;
+ case AArch64::STRDui:
+ return AArch64::STRDpre;
+ case AArch64::STRQui:
+ return AArch64::STRQpre;
+ case AArch64::STRWui:
+ return AArch64::STRWpre;
+ case AArch64::STRXui:
+ return AArch64::STRXpre;
+ case AArch64::LDRSui:
+ return AArch64::LDRSpre;
+ case AArch64::LDRDui:
+ return AArch64::LDRDpre;
+ case AArch64::LDRQui:
+ return AArch64::LDRQpre;
+ case AArch64::LDRWui:
+ return AArch64::LDRWpre;
+ case AArch64::LDRXui:
+ return AArch64::LDRXpre;
}
}
@@ -260,7 +269,7 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
- bool mergeForward) {
+ bool MergeForward) {
MachineBasicBlock::iterator NextI = I;
++NextI;
// If NextI is the second of the two instructions to be merged, we need
@@ -276,12 +285,12 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
unsigned NewOpc = getMatchingPairOpcode(I->getOpcode());
// Insert our new paired instruction after whichever of the paired
- // instructions mergeForward indicates.
- MachineBasicBlock::iterator InsertionPoint = mergeForward ? Paired : I;
- // Also based on mergeForward is from where we copy the base register operand
+ // instructions MergeForward indicates.
+ MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
+ // Also based on MergeForward is from where we copy the base register operand
// so we get the flags compatible with the input code.
MachineOperand &BaseRegOp =
- mergeForward ? Paired->getOperand(1) : I->getOperand(1);
+ MergeForward ? Paired->getOperand(1) : I->getOperand(1);
// Which register is Rt and which is Rt2 depends on the offset order.
MachineInstr *RtMI, *Rt2MI;
@@ -355,8 +364,8 @@ static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
if (IsUnscaled) {
// Convert the byte-offset used by unscaled into an "element" offset used
// by the scaled pair load/store instructions.
- int elemOffset = Offset / OffsetStride;
- if (elemOffset > 63 || elemOffset < -64)
+ int ElemOffset = Offset / OffsetStride;
+ if (ElemOffset > 63 || ElemOffset < -64)
return false;
}
return true;
@@ -374,14 +383,14 @@ static int alignTo(int Num, int PowOf2) {
/// be combined with the current instruction into a load/store pair.
MachineBasicBlock::iterator
AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
- bool &mergeForward, unsigned Limit) {
+ bool &MergeForward, unsigned Limit) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator MBBI = I;
MachineInstr *FirstMI = I;
++MBBI;
int Opc = FirstMI->getOpcode();
- bool mayLoad = FirstMI->mayLoad();
+ bool MayLoad = FirstMI->mayLoad();
bool IsUnscaled = isUnscaledLdst(Opc);
unsigned Reg = FirstMI->getOperand(0).getReg();
unsigned BaseReg = FirstMI->getOperand(1).getReg();
@@ -453,7 +462,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// If the destination register of the loads is the same register, bail
// and keep looking. A load-pair instruction with both destination
// registers the same is UNPREDICTABLE and will result in an exception.
- if (mayLoad && Reg == MI->getOperand(0).getReg()) {
+ if (MayLoad && Reg == MI->getOperand(0).getReg()) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
continue;
}
@@ -462,7 +471,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// the two instructions, we can combine the second into the first.
if (!ModifiedRegs[MI->getOperand(0).getReg()] &&
!UsedRegs[MI->getOperand(0).getReg()]) {
- mergeForward = false;
+ MergeForward = false;
return MBBI;
}
@@ -471,7 +480,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// second.
if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] &&
!UsedRegs[FirstMI->getOperand(0).getReg()]) {
- mergeForward = true;
+ MergeForward = true;
return MBBI;
}
// Unable to combine these instructions due to interference in between.
@@ -798,14 +807,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
break;
}
// Look ahead up to ScanLimit instructions for a pairable instruction.
- bool mergeForward = false;
+ bool MergeForward = false;
MachineBasicBlock::iterator Paired =
- findMatchingInsn(MBBI, mergeForward, ScanLimit);
+ findMatchingInsn(MBBI, MergeForward, ScanLimit);
if (Paired != E) {
// Merge the loads into a pair. Keeping the iterator straight is a
// pain, so we let the merge routine tell us what the next instruction
// is after it's done mucking about.
- MBBI = mergePairedInsns(MBBI, Paired, mergeForward);
+ MBBI = mergePairedInsns(MBBI, Paired, MergeForward);
Modified = true;
++NumPairCreated;
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index ab6d375..75a17b9 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -51,7 +51,7 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO,
AArch64II::MO_PAGEOFF)
RefKind = MCSymbolRefExpr::VK_GOTPAGEOFF;
else
- assert(0 && "Unexpected target flags with MO_GOT on GV operand");
+ llvm_unreachable("Unexpected target flags with MO_GOT on GV operand");
} else if ((MO.getTargetFlags() & AArch64II::MO_TLS) != 0) {
if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE)
RefKind = MCSymbolRefExpr::VK_TLVPPAGE;
@@ -154,7 +154,7 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO,
MCOperand &MCOp) const {
switch (MO.getType()) {
default:
- assert(0 && "unknown operand type");
+ llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
// Ignore all implicit register operands.
if (MO.isImplicit())
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
index 21c927f..a30e4ad 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -175,7 +175,7 @@ def GPR64all : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR, SP)>;
// This is for indirect tail calls to store the address of the destination.
def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X21,
X22, X23, X24, X25, X26,
- X27, X28)>;
+ X27, X28, FP, LR)>;
// GPR register classes for post increment amount of vector load/store that
// has alternate printing when Rm=31 and prints a constant immediate value
diff --git a/lib/Target/AArch64/AArch64SchedA53.td b/lib/Target/AArch64/AArch64SchedA53.td
index 0c3949e..d709bee 100644
--- a/lib/Target/AArch64/AArch64SchedA53.td
+++ b/lib/Target/AArch64/AArch64SchedA53.td
@@ -148,9 +148,9 @@ def : ReadAdvance<ReadVLD, 0>;
// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
// operands are needed one cycle later if and only if they are to be
-// shifted. Otherwise, they too are needed two cycle later. This same
+// shifted. Otherwise, they too are needed two cycles later. This same
// ReadAdvance applies to Extended registers as well, even though there is
-// a seperate SchedPredicate for them.
+// a separate SchedPredicate for them.
def : ReadAdvance<ReadI, 2, [WriteImm,WriteI,
WriteISReg, WriteIEReg,WriteIS,
WriteID32,WriteID64,
diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td
new file mode 100644
index 0000000..8209f96
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SchedA57.td
@@ -0,0 +1,304 @@
+//=- AArch64SchedA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for ARM Cortex-A57 to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def CortexA57Model : SchedMachineModel {
+ let IssueWidth = 8; // 3-way decode and 8-way issue
+ let MicroOpBufferSize = 128; // 128 micro-op re-order buffer
+ let LoadLatency = 4; // Optimistic load latency
+ let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Cortex-A57.
+// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where
+// micro-ops wait for their operands and then issue out-of-order.
+
+def A57UnitB : ProcResource<1> { let BufferSize = 8; } // Type B micro-ops
+def A57UnitI : ProcResource<2> { let BufferSize = 8; } // Type I micro-ops
+def A57UnitM : ProcResource<1> { let BufferSize = 8; } // Type M micro-ops
+def A57UnitL : ProcResource<1> { let BufferSize = 8; } // Type L micro-ops
+def A57UnitS : ProcResource<1> { let BufferSize = 8; } // Type S micro-ops
+def A57UnitX : ProcResource<1> { let BufferSize = 8; } // Type X micro-ops
+def A57UnitW : ProcResource<1> { let BufferSize = 8; } // Type W micro-ops
+let SchedModel = CortexA57Model in {
+ def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops
+}
+
+
+let SchedModel = CortexA57Model in {
+
+//===----------------------------------------------------------------------===//
+// Define customized scheduler read/write types specific to the Cortex-A57.
+
+include "AArch64SchedA57WriteRes.td"
+
+//===----------------------------------------------------------------------===//
+// Map the target-defined scheduler read/write resources and latency for
+// Cortex-A57. The Cortex-A57 types are directly associated with resources, so
+// defining the aliases precludes the need for mapping them using WriteRes. The
+// aliases are sufficient for creating a coarse, working model. As the model
+// evolves, InstRWs will be used to override these SchedAliases.
+
+def : SchedAlias<WriteImm, A57Write_1cyc_1I>;
+def : SchedAlias<WriteI, A57Write_1cyc_1I>;
+def : SchedAlias<WriteISReg, A57Write_2cyc_1M>;
+def : SchedAlias<WriteIEReg, A57Write_2cyc_1M>;
+def : SchedAlias<WriteExtr, A57Write_1cyc_1I>;
+def : SchedAlias<WriteIS, A57Write_1cyc_1I>;
+def : SchedAlias<WriteID32, A57Write_19cyc_1M>;
+def : SchedAlias<WriteID64, A57Write_35cyc_1M>;
+def : SchedAlias<WriteIM32, A57Write_3cyc_1M>;
+def : SchedAlias<WriteIM64, A57Write_5cyc_1M>;
+def : SchedAlias<WriteBr, A57Write_1cyc_1B>;
+def : SchedAlias<WriteBrReg, A57Write_1cyc_1B>;
+def : SchedAlias<WriteLD, A57Write_4cyc_1L>;
+def : SchedAlias<WriteST, A57Write_1cyc_1S>;
+def : SchedAlias<WriteSTP, A57Write_1cyc_1S>;
+def : SchedAlias<WriteAdr, A57Write_1cyc_1I>;
+def : SchedAlias<WriteLDIdx, A57Write_4cyc_1I_1L>;
+def : SchedAlias<WriteSTIdx, A57Write_1cyc_1I_1S>;
+def : SchedAlias<WriteF, A57Write_3cyc_1V>;
+def : SchedAlias<WriteFCmp, A57Write_3cyc_1V>;
+def : SchedAlias<WriteFCvt, A57Write_5cyc_1V>;
+def : SchedAlias<WriteFCopy, A57Write_3cyc_1V>;
+def : SchedAlias<WriteFImm, A57Write_3cyc_1V>;
+def : SchedAlias<WriteFMul, A57Write_5cyc_1V>;
+def : SchedAlias<WriteFDiv, A57Write_18cyc_1X>;
+def : SchedAlias<WriteV, A57Write_3cyc_1V>;
+def : SchedAlias<WriteVLD, A57Write_5cyc_1L>;
+def : SchedAlias<WriteVST, A57Write_1cyc_1S>;
+
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+
+def : WriteRes<WriteLDHi, []> { let Latency = 4; }
+
+// Forwarding logic is not [yet] explicitly modeled beyond what is captured
+// in the latencies of the A57 Generic SchedWriteRes's.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+
+//===----------------------------------------------------------------------===//
+// Specialize the coarse model by associating instruction groups with the
+// subtarget-defined types. As the modeled is refined, this will override most
+// of the above ShchedAlias mappings.
+
+// Miscellaneous
+// -----------------------------------------------------------------------------
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+
+// Branch Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_1cyc_1B_1I], (instrs BL)>;
+def : InstRW<[A57Write_2cyc_1B_1I], (instrs BLR)>;
+
+
+// Divide and Multiply Instructions
+// -----------------------------------------------------------------------------
+
+// Multiply high
+def : InstRW<[A57Write_6cyc_1M], (instrs SMULHrr, UMULHrr)>;
+
+
+// Miscellaneous Data-Processing Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_1cyc_1I], (instrs EXTRWrri)>;
+def : InstRW<[A57Write_3cyc_1I_1M], (instrs EXTRXrri)>;
+def : InstRW<[A57Write_2cyc_1M], (instregex "BFM")>;
+
+
+// Cryptography Extensions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_3cyc_1W], (instregex "CRC32")>;
+
+
+// Vector Load
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1i(8|16|32)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32)_POST$")>;
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD1i(64)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1i(64)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Rv(1d)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Rv(1d)_POST$")>;
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_7cyc_3L], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_7cyc_3L, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_8cyc_4L], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD2i(8|16)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD2i(32)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD2i(32)_POST$")>;
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2i(64)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2i(64)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2Rv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD2Rv(1d)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD2Rv(1d)_POST$")>;
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s)$")>;
+def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD2Twov(2d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD2Twov(2d)_POST$")>;
+
+def : InstRW<[A57Write_9cyc_1L_3V], (instregex "LD3i(8|16)$")>;
+def : InstRW<[A57Write_9cyc_1L_3V, WriteAdr], (instregex "LD3i(8|16)_POST$")>;
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD3i(32)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD3i(32)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD3i(64)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD3i(64)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD3Rv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD3Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD3Rv(1d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD3Rv(1d)_POST$")>;
+def : InstRW<[A57Write_9cyc_1L_3V], (instregex "LD3Rv(16b|8h|4s)$")>;
+def : InstRW<[A57Write_9cyc_1L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD3Rv(2d)$")>;
+def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD3Rv(2d)_POST$")>;
+
+def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD3Threev(8b|4h|2s)$")>;
+def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_10cyc_3L_4V], (instregex "LD3Threev(16b|8h|4s)$")>;
+def : InstRW<[A57Write_10cyc_3L_4V, WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_8cyc_4L], (instregex "LD3Threev(2d)$")>;
+def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
+
+def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4i(8|16)$")>;
+def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4i(8|16)_POST$")>;
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD4i(32)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD4i(32)_POST$")>;
+def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4i(64)$")>;
+def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4i(64)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD4Rv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD4Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD4Rv(1d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD4Rv(1d)_POST$")>;
+def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4Rv(16b|8h|4s)$")>;
+def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4Rv(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_9cyc_2L_4V], (instregex "LD4Rv(2d)$")>;
+def : InstRW<[A57Write_9cyc_2L_4V, WriteAdr], (instregex "LD4Rv(2d)_POST$")>;
+
+def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_11cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s)$")>;
+def : InstRW<[A57Write_11cyc_4L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_8cyc_4L], (instregex "LD4Fourv(2d)$")>;
+def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
+
+// Vector Store
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_1cyc_1S], (instregex "ST1i(8|16|32)$")>;
+def : InstRW<[A57Write_1cyc_1S, WriteAdr], (instregex "ST1i(8|16|32)_POST$")>;
+def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST1i(64)$")>;
+def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST1i(64)_POST$")>;
+
+def : InstRW<[A57Write_1cyc_1S], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_1cyc_1S, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_2cyc_2S], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_2cyc_2S], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_4cyc_4S], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_3cyc_3S], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_3cyc_3S, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_6cyc_6S], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_6cyc_6S, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_4cyc_4S], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_8cyc_8S], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST2i(8|16|32)$")>;
+def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST2i(8|16|32)_POST$")>;
+def : InstRW<[A57Write_2cyc_2S], (instregex "ST2i(64)$")>;
+def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST2i(64)_POST$")>;
+
+def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_4cyc_4S_2V], (instregex "ST2Twov(16b|8h|4s)$")>;
+def : InstRW<[A57Write_4cyc_4S_2V, WriteAdr], (instregex "ST2Twov(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_4cyc_4S], (instregex "ST2Twov(2d)$")>;
+def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST2Twov(2d)_POST$")>;
+
+def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST3i(8|16)$")>;
+def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST3i(8|16)_POST$")>;
+def : InstRW<[A57Write_3cyc_3S], (instregex "ST3i(32)$")>;
+def : InstRW<[A57Write_3cyc_3S, WriteAdr], (instregex "ST3i(32)_POST$")>;
+def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST3i(64)$")>;
+def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST3i(64)_POST$")>;
+
+def : InstRW<[A57Write_3cyc_3S_2V], (instregex "ST3Threev(8b|4h|2s)$")>;
+def : InstRW<[A57Write_3cyc_3S_2V, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_6cyc_6S_4V], (instregex "ST3Threev(16b|8h|4s)$")>;
+def : InstRW<[A57Write_6cyc_6S_4V, WriteAdr], (instregex "ST3Threev(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_6cyc_6S], (instregex "ST3Threev(2d)$")>;
+def : InstRW<[A57Write_6cyc_6S, WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
+
+def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST4i(8|16)$")>;
+def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST4i(8|16)_POST$")>;
+def : InstRW<[A57Write_4cyc_4S], (instregex "ST4i(32)$")>;
+def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST4i(32)_POST$")>;
+def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST4i(64)$")>;
+def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST4i(64)_POST$")>;
+
+def : InstRW<[A57Write_4cyc_4S_2V], (instregex "ST4Fourv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_4cyc_4S_2V, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_8cyc_8S_4V], (instregex "ST4Fourv(16b|8h|4s)$")>;
+def : InstRW<[A57Write_8cyc_8S_4V, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_8cyc_8S], (instregex "ST4Fourv(2d)$")>;
+def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+
+} // SchedModel = CortexA57Model
diff --git a/lib/Target/AArch64/AArch64SchedA57WriteRes.td b/lib/Target/AArch64/AArch64SchedA57WriteRes.td
new file mode 100644
index 0000000..a8f421b
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SchedA57WriteRes.td
@@ -0,0 +1,512 @@
+//=- AArch64SchedA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains all of the Cortex-A57 specific SchedWriteRes types. The approach
+// below is to define a generic SchedWriteRes for every combination of
+// latency and microOps. The naming conventions is to use a prefix, one field
+// for latency, and one or more microOp count/type designators.
+// Prefix: A57Write
+// Latency: #cyc
+// MicroOp Count/Types: #(B|I|M|L|S|X|W|V)
+//
+// e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are
+// 11 micro-ops to be issued down one I pipe, six S pipes and four V pipes.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Define Generic 1 micro-op types
+
+def A57Write_5cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 5; }
+def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; }
+def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
+def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
+def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; }
+def A57Write_18cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 18; }
+def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19; }
+def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; }
+def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; }
+def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; }
+def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; }
+def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32; }
+def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35; }
+def A57Write_3cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
+def A57Write_3cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 3; }
+def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; }
+def A57Write_3cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 3; }
+def A57Write_4cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 4; }
+def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57Write_9cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
+def A57Write_6cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 6; }
+def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; }
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 2 micro-op types
+
+def A57Write_64cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 64;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_1I_1L : SchedWriteRes<[A57UnitI,
+ A57UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_7cyc_1V_1X : SchedWriteRes<[A57UnitV,
+ A57UnitX]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+def A57Write_8cyc_1L_1V : SchedWriteRes<[A57UnitL,
+ A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def A57Write_9cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def A57Write_8cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2L : SchedWriteRes<[A57UnitL, A57UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_1I_1L : SchedWriteRes<[A57UnitI,
+ A57UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL,
+ A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+def A57Write_10cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+def A57Write_1cyc_1B_1I : SchedWriteRes<[A57UnitB,
+ A57UnitI]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def A57Write_1cyc_1I_1S : SchedWriteRes<[A57UnitI,
+ A57UnitS]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_1B_1I : SchedWriteRes<[A57UnitB,
+ A57UnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_2S : SchedWriteRes<[A57UnitS, A57UnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_36cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 36;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI,
+ A57UnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_1I_1S : SchedWriteRes<[A57UnitI,
+ A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_1S_1V : SchedWriteRes<[A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_4cyc_1I_1L : SchedWriteRes<[A57UnitI,
+ A57UnitL]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def A57Write_4cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 3 micro-op types
+
+def A57Write_10cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+def A57Write_2cyc_1I_2S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_1I_1S_1V : SchedWriteRes<[A57UnitI,
+ A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_1M_2S : SchedWriteRes<[A57UnitM,
+ A57UnitS, A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_3S : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_2S_1V : SchedWriteRes<[A57UnitS, A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_5cyc_1I_2L : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+def A57Write_6cyc_1I_2L : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+def A57Write_6cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+def A57Write_7cyc_3L : SchedWriteRes<[A57UnitL, A57UnitL, A57UnitL]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def A57Write_8cyc_1I_1L_1V : SchedWriteRes<[A57UnitI,
+ A57UnitL,
+ A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def A57Write_8cyc_1L_2V : SchedWriteRes<[A57UnitL,
+ A57UnitV, A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def A57Write_8cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def A57Write_9cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 4 micro-op types
+
+def A57Write_2cyc_2I_2S : SchedWriteRes<[A57UnitI, A57UnitI,
+ A57UnitS, A57UnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+def A57Write_3cyc_2I_2S : SchedWriteRes<[A57UnitI, A57UnitI,
+ A57UnitS, A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+def A57Write_3cyc_1I_3S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS, A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+def A57Write_3cyc_1I_2S_1V : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+def A57Write_4cyc_4S : SchedWriteRes<[A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+def A57Write_7cyc_1I_3L : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL, A57UnitL]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+}
+def A57Write_5cyc_2I_2L : SchedWriteRes<[A57UnitI, A57UnitI,
+ A57UnitL, A57UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+}
+def A57Write_8cyc_1I_1L_2V : SchedWriteRes<[A57UnitI,
+ A57UnitL,
+ A57UnitV, A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+def A57Write_8cyc_4L : SchedWriteRes<[A57UnitL, A57UnitL,
+ A57UnitL, A57UnitL]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+def A57Write_9cyc_2L_2V : SchedWriteRes<[A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+}
+def A57Write_9cyc_1L_3V : SchedWriteRes<[A57UnitL,
+ A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 5 micro-op types
+
+def A57Write_3cyc_3S_2V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 5;
+}
+def A57Write_8cyc_1I_4L : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL,
+ A57UnitL, A57UnitL]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+}
+def A57Write_4cyc_1I_4S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
+ let Latency = 4;
+ let NumMicroOps = 5;
+}
+def A57Write_9cyc_1I_2L_2V : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+}
+def A57Write_9cyc_1I_1L_3V : SchedWriteRes<[A57UnitI,
+ A57UnitL,
+ A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+}
+def A57Write_9cyc_2L_3V : SchedWriteRes<[A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 6 micro-op types
+
+def A57Write_3cyc_1I_3S_2V : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 6;
+}
+def A57Write_4cyc_2I_4S : SchedWriteRes<[A57UnitI, A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+def A57Write_4cyc_4S_2V : SchedWriteRes<[A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+def A57Write_6cyc_6S : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS]> {
+ let Latency = 6;
+ let NumMicroOps = 6;
+}
+def A57Write_9cyc_1I_2L_3V : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+def A57Write_9cyc_1I_1L_4V : SchedWriteRes<[A57UnitI,
+ A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+def A57Write_9cyc_2L_4V : SchedWriteRes<[A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 7 micro-op types
+
+def A57Write_10cyc_3L_4V : SchedWriteRes<[A57UnitL, A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 7;
+}
+def A57Write_4cyc_1I_4S_2V : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 7;
+}
+def A57Write_6cyc_1I_6S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS]> {
+ let Latency = 6;
+ let NumMicroOps = 7;
+}
+def A57Write_9cyc_1I_2L_4V : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 7;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 8 micro-op types
+
+def A57Write_10cyc_1I_3L_4V : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 8;
+}
+def A57Write_11cyc_4L_4V : SchedWriteRes<[A57UnitL, A57UnitL,
+ A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 11;
+ let NumMicroOps = 8;
+}
+def A57Write_8cyc_8S : SchedWriteRes<[A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
+ let Latency = 8;
+ let NumMicroOps = 8;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 9 micro-op types
+
+def A57Write_8cyc_1I_8S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
+ let Latency = 8;
+ let NumMicroOps = 9;
+}
+def A57Write_11cyc_1I_4L_4V : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL,
+ A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 11;
+ let NumMicroOps = 9;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 10 micro-op types
+
+def A57Write_6cyc_6S_4V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 10;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 11 micro-op types
+
+def A57Write_6cyc_1I_6S_4V : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 11;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 12 micro-op types
+
+def A57Write_8cyc_8S_4V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 12;
+}
+
+//===----------------------------------------------------------------------===//
+// Define Generic 13 micro-op types
+
+def A57Write_8cyc_1I_8S_4V : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 13;
+}
+
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 5c65b75..1bf64fc 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -16,9 +16,8 @@ using namespace llvm;
#define DEBUG_TYPE "aarch64-selectiondag-info"
-AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const TargetMachine &TM)
- : TargetSelectionDAGInfo(TM),
- Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {}
+AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const DataLayout *DL)
+ : TargetSelectionDAGInfo(DL) {}
AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() {}
@@ -30,7 +29,9 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
const char *bzeroEntry =
- (V && V->isNullValue()) ? Subtarget->getBZeroEntry() : nullptr;
+ (V && V->isNullValue())
+ ? DAG.getTarget().getSubtarget<AArch64Subtarget>().getBZeroEntry()
+ : nullptr;
// For small size (< 256), it is not beneficial to use bzero
// instead of memset.
if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) {
@@ -50,7 +51,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Chain)
.setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(bzeroEntry, IntPtr), &Args, 0)
+ DAG.getExternalSymbol(bzeroEntry, IntPtr), std::move(Args), 0)
.setDiscardResult();
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
return CallResult.second;
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 8381f99..1180eea 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -19,12 +19,8 @@
namespace llvm {
class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo {
- /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
- /// make the right decision when generating code for different targets.
- const AArch64Subtarget *Subtarget;
-
public:
- explicit AArch64SelectionDAGInfo(const TargetMachine &TM);
+ explicit AArch64SelectionDAGInfo(const DataLayout *DL);
~AArch64SelectionDAGInfo();
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index cd69994..bb0b72c 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -30,21 +30,35 @@ static cl::opt<bool>
EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
"converter pass"), cl::init(true), cl::Hidden);
-AArch64Subtarget::AArch64Subtarget(const std::string &TT,
- const std::string &CPU,
- const std::string &FS, bool LittleEndian)
- : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
- HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
- HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), CPUString(CPU),
- TargetTriple(TT), IsLittleEndian(LittleEndian) {
+AArch64Subtarget &
+AArch64Subtarget::initializeSubtargetDependencies(StringRef FS) {
// Determine default and user-specified characteristics
if (CPUString.empty())
CPUString = "generic";
ParseSubtargetFeatures(CPUString, FS);
+ return *this;
}
+AArch64Subtarget::AArch64Subtarget(const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS, TargetMachine &TM,
+ bool LittleEndian)
+ : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
+ HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
+ HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), CPUString(CPU),
+ TargetTriple(TT),
+ // This nested ternary is horrible, but DL needs to be properly
+ // initialized
+ // before TLInfo is constructed.
+ DL(isTargetMachO()
+ ? "e-m:o-i64:64-i128:128-n32:64-S128"
+ : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128"
+ : "E-m:e-i64:64-i128:128-n32:64-S128")),
+ FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)),
+ TSInfo(&DL), TLInfo(TM) {}
+
/// ClassifyGlobalReference - Find the target operand flags that describe
/// how a global value should be referenced for the current subtarget.
unsigned char
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index 590ea05..52124f6 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -14,8 +14,13 @@
#ifndef AArch64SUBTARGET_H
#define AArch64SUBTARGET_H
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64ISelLowering.h"
#include "AArch64RegisterInfo.h"
+#include "AArch64SelectionDAGInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
@@ -49,15 +54,32 @@ protected:
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
- /// IsLittleEndian - Is the target little endian?
- bool IsLittleEndian;
+ const DataLayout DL;
+ AArch64FrameLowering FrameLowering;
+ AArch64InstrInfo InstrInfo;
+ AArch64SelectionDAGInfo TSInfo;
+ AArch64TargetLowering TLInfo;
+private:
+ /// initializeSubtargetDependencies - Initializes using CPUString and the
+ /// passed in feature string so that we can use initializer lists for
+ /// subtarget initialization.
+ AArch64Subtarget &initializeSubtargetDependencies(StringRef FS);
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
AArch64Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool LittleEndian);
-
+ const std::string &FS, TargetMachine &TM, bool LittleEndian);
+
+ const AArch64SelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ const AArch64FrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ const AArch64TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+ const AArch64InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ const DataLayout *getDataLayout() const { return &DL; }
bool enableMachineScheduler() const override { return true; }
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
@@ -69,7 +91,7 @@ public:
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
- bool isLittleEndian() const { return IsLittleEndian; }
+ bool isLittleEndian() const { return DL.isLittleEndian(); }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index 0b5dd2f..f99b90b 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -53,6 +53,12 @@ static cl::opt<bool>
EnableLoadStoreOpt("aarch64-load-store-opt", cl::desc("Enable the load/store pair"
" optimization pass"), cl::init(true), cl::Hidden);
+static cl::opt<bool>
+EnableAtomicTidy("aarch64-atomic-cfg-tidy", cl::Hidden,
+ cl::desc("Run SimplifyCFG after expanding atomic operations"
+ " to make use of cmpxchg flow-based information"),
+ cl::init(true));
+
extern "C" void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget);
@@ -71,16 +77,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL,
bool LittleEndian)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, LittleEndian),
- // This nested ternary is horrible, but DL needs to be properly
- // initialized
- // before TLInfo is constructed.
- DL(Subtarget.isTargetMachO()
- ? "e-m:o-i64:64-i128:128-n32:64-S128"
- : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128"
- : "E-m:e-i64:64-i128:128-n32:64-S128")),
- InstrInfo(Subtarget), TLInfo(*this), FrameLowering(*this, Subtarget),
- TSInfo(*this) {
+ Subtarget(TT, CPU, FS, *this, LittleEndian) {
initAsmInfo();
}
@@ -113,6 +110,7 @@ public:
return getTM<AArch64TargetMachine>();
}
+ void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
bool addILPOpts() override;
@@ -135,6 +133,20 @@ TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
return new AArch64PassConfig(this, PM);
}
+void AArch64PassConfig::addIRPasses() {
+ // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
+ // ourselves.
+ addPass(createAtomicExpandLoadLinkedPass(TM));
+
+ // Cmpxchg instructions are often used with a subsequent comparison to
+ // determine whether it succeeded. We can exploit existing control-flow in
+ // ldrex/strex loops to simplify this, but it needs tidying up.
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
+ addPass(createCFGSimplificationPass());
+
+ TargetPassConfig::addIRPasses();
+}
+
// Pass Pipeline Configuration
bool AArch64PassConfig::addPreISel() {
// Run promote constant before global merge, so that the promoted constants
@@ -146,10 +158,6 @@ bool AArch64PassConfig::addPreISel() {
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createAArch64AddressTypePromotionPass());
- // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
- // ourselves.
- addPass(createAtomicExpandLoadLinkedPass(TM));
-
return false;
}
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
index 079b19b..852cb3f 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -15,13 +15,9 @@
#define AArch64TARGETMACHINE_H
#include "AArch64InstrInfo.h"
-#include "AArch64ISelLowering.h"
#include "AArch64Subtarget.h"
-#include "AArch64FrameLowering.h"
-#include "AArch64SelectionDAGInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/MC/MCStreamer.h"
namespace llvm {
@@ -29,13 +25,6 @@ class AArch64TargetMachine : public LLVMTargetMachine {
protected:
AArch64Subtarget Subtarget;
-private:
- const DataLayout DL;
- AArch64InstrInfo InstrInfo;
- AArch64TargetLowering TLInfo;
- AArch64FrameLowering FrameLowering;
- AArch64SelectionDAGInfo TSInfo;
-
public:
AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -46,18 +35,22 @@ public:
return &Subtarget;
}
const AArch64TargetLowering *getTargetLowering() const override {
- return &TLInfo;
+ return getSubtargetImpl()->getTargetLowering();
+ }
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
}
- const DataLayout *getDataLayout() const override { return &DL; }
const AArch64FrameLowering *getFrameLowering() const override {
- return &FrameLowering;
+ return getSubtargetImpl()->getFrameLowering();
+ }
+ const AArch64InstrInfo *getInstrInfo() const override {
+ return getSubtargetImpl()->getInstrInfo();
}
- const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
const AArch64RegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
+ return &getInstrInfo()->getRegisterInfo();
}
const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
- return &TSInfo;
+ return getSubtargetImpl()->getSelectionDAGInfo();
}
// Pass Pipeline Configuration
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 33e482a..1dac14b 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -306,28 +306,64 @@ unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
// LowerVectorINT_TO_FP:
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
+
+ // Complex: to v2f32
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
+
+ // Complex: to v4f32
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+
+ // Complex: to v2f64
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+
+
// LowerVectorFP_TO_INT
+ { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 },
- { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
- { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 4 },
- { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 4 },
- { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4 },
- { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 4 },
- { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 4 },
- { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 4 },
+
+ // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
+ { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
+
+ // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
+ { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
+
+ // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
+ { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
};
int Idx = ConvertCostTableLookup<MVT>(
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 65b77c5..c42d11e 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -38,14 +38,19 @@ namespace {
class AArch64Operand;
class AArch64AsmParser : public MCTargetAsmParser {
-public:
- typedef SmallVectorImpl<MCParsedAsmOperand *> OperandVector;
-
private:
StringRef Mnemonic; ///< Instruction mnemonic.
MCSubtargetInfo &STI;
MCAsmParser &Parser;
+ // Map of register aliases registers via the .req directive.
+ StringMap<std::pair<bool, unsigned> > RegisterReqs;
+
+ AArch64TargetStreamer &getTargetStreamer() {
+ MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
+ return static_cast<AArch64TargetStreamer &>(TS);
+ }
+
MCAsmParser &getParser() const { return Parser; }
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
@@ -54,6 +59,7 @@ private:
bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands);
AArch64CC::CondCode parseCondCodeString(StringRef Cond);
bool parseCondCode(OperandVector &Operands, bool invertCondCode);
+ unsigned matchRegisterNameAlias(StringRef Name, bool isVector);
int tryParseRegister();
int tryMatchVectorRegister(StringRef &Kind, bool expected);
bool parseRegister(OperandVector &Operands);
@@ -70,6 +76,10 @@ private:
bool parseDirectiveTLSDescCall(SMLoc L);
bool parseDirectiveLOH(StringRef LOH, SMLoc L);
+ bool parseDirectiveLtorg(SMLoc L);
+
+ bool parseDirectiveReq(StringRef Name, SMLoc L);
+ bool parseDirectiveUnreq(SMLoc L);
bool validateInstruction(MCInst &Inst, SmallVectorImpl<SMLoc> &Loc);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -108,6 +118,8 @@ public:
const MCTargetOptions &Options)
: MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
MCAsmParserExtension::Initialize(_Parser);
+ if (Parser.getStreamer().getTargetStreamer() == nullptr)
+ new AArch64TargetStreamer(Parser.getStreamer());
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -117,7 +129,7 @@ public:
SMLoc NameLoc, OperandVector &Operands) override;
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
bool ParseDirective(AsmToken DirectiveID) override;
- unsigned validateTargetOperandClass(MCParsedAsmOperand *Op,
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
static bool classifySymbolRef(const MCExpr *Expr,
@@ -240,10 +252,10 @@ private:
// the add<>Operands() calls.
MCContext &Ctx;
+public:
AArch64Operand(KindTy K, MCContext &_Ctx)
: MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {}
-public:
AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) {
Kind = o.Kind;
StartLoc = o.StartLoc;
@@ -607,7 +619,11 @@ public:
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
if (!MCE)
return false;
- return AArch64_AM::isLogicalImmediate(MCE->getValue(), 32);
+ int64_t Val = MCE->getValue();
+ if (Val >> 32 != 0 && Val >> 32 != ~0LL)
+ return false;
+ Val &= 0xFFFFFFFF;
+ return AArch64_AM::isLogicalImmediate(Val, 32);
}
bool isLogicalImm64() const {
if (!isImm())
@@ -617,6 +633,23 @@ public:
return false;
return AArch64_AM::isLogicalImmediate(MCE->getValue(), 64);
}
+ bool isLogicalImm32Not() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = ~MCE->getValue() & 0xFFFFFFFF;
+ return AArch64_AM::isLogicalImmediate(Val, 32);
+ }
+ bool isLogicalImm64Not() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ return AArch64_AM::isLogicalImmediate(~MCE->getValue(), 64);
+ }
bool isShiftedImm() const { return Kind == k_ShiftedImm; }
bool isAddSubImm() const {
if (!isShiftedImm() && !isImm())
@@ -1348,7 +1381,8 @@ public:
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
assert(MCE && "Invalid logical immediate operand!");
- uint64_t encoding = AArch64_AM::encodeLogicalImmediate(MCE->getValue(), 32);
+ uint64_t encoding =
+ AArch64_AM::encodeLogicalImmediate(MCE->getValue() & 0xFFFFFFFF, 32);
Inst.addOperand(MCOperand::CreateImm(encoding));
}
@@ -1360,6 +1394,22 @@ public:
Inst.addOperand(MCOperand::CreateImm(encoding));
}
+ void addLogicalImm32NotOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
+ int64_t Val = ~MCE->getValue() & 0xFFFFFFFF;
+ uint64_t encoding = AArch64_AM::encodeLogicalImmediate(Val, 32);
+ Inst.addOperand(MCOperand::CreateImm(encoding));
+ }
+
+ void addLogicalImm64NotOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
+ uint64_t encoding =
+ AArch64_AM::encodeLogicalImmediate(~MCE->getValue(), 64);
+ Inst.addOperand(MCOperand::CreateImm(encoding));
+ }
+
void addSIMDImmType10Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
@@ -1523,9 +1573,9 @@ public:
void print(raw_ostream &OS) const override;
- static AArch64Operand *CreateToken(StringRef Str, bool IsSuffix, SMLoc S,
- MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_Token, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateToken(StringRef Str, bool IsSuffix, SMLoc S, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_Token, Ctx);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->Tok.IsSuffix = IsSuffix;
@@ -1534,9 +1584,9 @@ public:
return Op;
}
- static AArch64Operand *CreateReg(unsigned RegNum, bool isVector, SMLoc S,
- SMLoc E, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_Register, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateReg(unsigned RegNum, bool isVector, SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_Register, Ctx);
Op->Reg.RegNum = RegNum;
Op->Reg.isVector = isVector;
Op->StartLoc = S;
@@ -1544,10 +1594,10 @@ public:
return Op;
}
- static AArch64Operand *CreateVectorList(unsigned RegNum, unsigned Count,
- unsigned NumElements, char ElementKind,
- SMLoc S, SMLoc E, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_VectorList, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateVectorList(unsigned RegNum, unsigned Count, unsigned NumElements,
+ char ElementKind, SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_VectorList, Ctx);
Op->VectorList.RegNum = RegNum;
Op->VectorList.Count = Count;
Op->VectorList.NumElements = NumElements;
@@ -1557,28 +1607,29 @@ public:
return Op;
}
- static AArch64Operand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E,
- MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_VectorIndex, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_VectorIndex, Ctx);
Op->VectorIndex.Val = Idx;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E,
- MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_Immediate, Ctx);
+ static std::unique_ptr<AArch64Operand> CreateImm(const MCExpr *Val, SMLoc S,
+ SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_Immediate, Ctx);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static AArch64Operand *CreateShiftedImm(const MCExpr *Val,
- unsigned ShiftAmount, SMLoc S,
- SMLoc E, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_ShiftedImm, Ctx);
+ static std::unique_ptr<AArch64Operand> CreateShiftedImm(const MCExpr *Val,
+ unsigned ShiftAmount,
+ SMLoc S, SMLoc E,
+ MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_ShiftedImm, Ctx);
Op->ShiftedImm .Val = Val;
Op->ShiftedImm.ShiftAmount = ShiftAmount;
Op->StartLoc = S;
@@ -1586,34 +1637,36 @@ public:
return Op;
}
- static AArch64Operand *CreateCondCode(AArch64CC::CondCode Code, SMLoc S,
- SMLoc E, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_CondCode, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateCondCode(AArch64CC::CondCode Code, SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_CondCode, Ctx);
Op->CondCode.Code = Code;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static AArch64Operand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_FPImm, Ctx);
+ static std::unique_ptr<AArch64Operand> CreateFPImm(unsigned Val, SMLoc S,
+ MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_FPImm, Ctx);
Op->FPImm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static AArch64Operand *CreateBarrier(unsigned Val, SMLoc S, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_Barrier, Ctx);
+ static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val, SMLoc S,
+ MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_Barrier, Ctx);
Op->Barrier.Val = Val;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S,
- uint64_t FeatureBits, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_SysReg, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateSysReg(StringRef Str, SMLoc S, uint64_t FeatureBits, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_SysReg, Ctx);
Op->SysReg.Data = Str.data();
Op->SysReg.Length = Str.size();
Op->SysReg.FeatureBits = FeatureBits;
@@ -1622,27 +1675,28 @@ public:
return Op;
}
- static AArch64Operand *CreateSysCR(unsigned Val, SMLoc S, SMLoc E,
- MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_SysCR, Ctx);
+ static std::unique_ptr<AArch64Operand> CreateSysCR(unsigned Val, SMLoc S,
+ SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_SysCR, Ctx);
Op->SysCRImm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static AArch64Operand *CreatePrefetch(unsigned Val, SMLoc S, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_Prefetch, Ctx);
+ static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val, SMLoc S,
+ MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_Prefetch, Ctx);
Op->Prefetch.Val = Val;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static AArch64Operand *CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp,
- unsigned Val, bool HasExplicitAmount,
- SMLoc S, SMLoc E, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val,
+ bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_ShiftExtend, Ctx);
Op->ShiftExtend.Type = ShOp;
Op->ShiftExtend.Amount = Val;
Op->ShiftExtend.HasExplicitAmount = HasExplicitAmount;
@@ -1816,6 +1870,26 @@ bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
return (RegNo == (unsigned)-1);
}
+// Matches a register name or register alias previously defined by '.req'
+unsigned AArch64AsmParser::matchRegisterNameAlias(StringRef Name,
+ bool isVector) {
+ unsigned RegNum = isVector ? matchVectorRegName(Name)
+ : MatchRegisterName(Name);
+
+ if (RegNum == 0) {
+ // Check for aliases registered via .req. Canonicalize to lower case.
+ // That's more consistent since register names are case insensitive, and
+ // it's how the original entry was passed in from MC/MCParser/AsmParser.
+ auto Entry = RegisterReqs.find(Name.lower());
+ if (Entry == RegisterReqs.end())
+ return 0;
+ // set RegNum if the match is the right kind of register
+ if (isVector == Entry->getValue().first)
+ RegNum = Entry->getValue().second;
+ }
+ return RegNum;
+}
+
/// tryParseRegister - Try to parse a register name. The token must be an
/// Identifier when called, and if it is a register name the token is eaten and
/// the register is added to the operand list.
@@ -1824,7 +1898,7 @@ int AArch64AsmParser::tryParseRegister() {
assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
std::string lowerCase = Tok.getString().lower();
- unsigned RegNum = MatchRegisterName(lowerCase);
+ unsigned RegNum = matchRegisterNameAlias(lowerCase, false);
// Also handle a few aliases of registers.
if (RegNum == 0)
RegNum = StringSwitch<unsigned>(lowerCase)
@@ -1854,7 +1928,8 @@ int AArch64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) {
// a '.'.
size_t Start = 0, Next = Name.find('.');
StringRef Head = Name.slice(Start, Next);
- unsigned RegNum = matchVectorRegName(Head);
+ unsigned RegNum = matchRegisterNameAlias(Head, true);
+
if (RegNum) {
if (Next != StringRef::npos) {
Kind = Name.slice(Next, StringRef::npos);
@@ -2183,8 +2258,11 @@ bool AArch64AsmParser::parseCondCode(OperandVector &Operands,
return TokError("invalid condition code");
Parser.Lex(); // Eat identifier token.
- if (invertCondCode)
+ if (invertCondCode) {
+ if (CC == AArch64CC::AL || CC == AArch64CC::NV)
+ return TokError("condition codes AL and NV are invalid for this instruction");
CC = AArch64CC::getInvertedCondCode(AArch64CC::CondCode(CC));
+ }
Operands.push_back(
AArch64Operand::CreateCondCode(CC, S, getLoc(), getContext()));
@@ -2849,7 +2927,7 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
if (!Tok.is(AsmToken::Identifier))
return MatchOperand_NoMatch;
- unsigned RegNum = MatchRegisterName(Tok.getString().lower());
+ unsigned RegNum = matchRegisterNameAlias(Tok.getString().lower(), false);
MCContext &Ctx = getContext();
const MCRegisterInfo *RI = Ctx.getRegisterInfo();
@@ -3000,6 +3078,43 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E, getContext()));
return false;
}
+ case AsmToken::Equal: {
+ SMLoc Loc = Parser.getTok().getLoc();
+ if (Mnemonic != "ldr") // only parse for ldr pseudo (e.g. ldr r0, =val)
+ return Error(Loc, "unexpected token in operand");
+ Parser.Lex(); // Eat '='
+ const MCExpr *SubExprVal;
+ if (getParser().parseExpression(SubExprVal))
+ return true;
+
+ MCContext& Ctx = getContext();
+ E = SMLoc::getFromPointer(Loc.getPointer() - 1);
+ // If the op is an imm and can be fit into a mov, then replace ldr with mov.
+ if (isa<MCConstantExpr>(SubExprVal) && Operands.size() >= 2 &&
+ static_cast<AArch64Operand &>(*Operands[1]).isReg()) {
+ bool IsXReg = AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
+ Operands[1]->getReg());
+ uint64_t Imm = (cast<MCConstantExpr>(SubExprVal))->getValue();
+ uint32_t ShiftAmt = 0, MaxShiftAmt = IsXReg ? 48 : 16;
+ while(Imm > 0xFFFF && countTrailingZeros(Imm) >= 16) {
+ ShiftAmt += 16;
+ Imm >>= 16;
+ }
+ if (ShiftAmt <= MaxShiftAmt && Imm <= 0xFFFF) {
+ Operands[0] = AArch64Operand::CreateToken("movz", false, Loc, Ctx);
+ Operands.push_back(AArch64Operand::CreateImm(
+ MCConstantExpr::Create(Imm, Ctx), S, E, Ctx));
+ if (ShiftAmt)
+ Operands.push_back(AArch64Operand::CreateShiftExtend(AArch64_AM::LSL,
+ ShiftAmt, true, S, E, Ctx));
+ return false;
+ }
+ }
+ // If it is a label or an imm that cannot fit in a movz, put it into CP.
+ const MCExpr *CPLoc = getTargetStreamer().addConstantPoolEntry(SubExprVal);
+ Operands.push_back(AArch64Operand::CreateImm(CPLoc, S, E, Ctx));
+ return false;
+ }
}
}
@@ -3029,6 +3144,15 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
.Case("bnv", "b.nv")
.Default(Name);
+ // First check for the AArch64-specific .req directive.
+ if (Parser.getTok().is(AsmToken::Identifier) &&
+ Parser.getTok().getIdentifier() == ".req") {
+ parseDirectiveReq(Name, NameLoc);
+ // We always return 'error' for this, as we're done with this
+ // statement and don't need to match the 'instruction."
+ return true;
+ }
+
// Create the leading tokens for the mnemonic, split by '.' characters.
size_t Start = 0, Next = Name.find('.');
StringRef Head = Name.slice(Start, Next);
@@ -3443,8 +3567,7 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
case Match_MnemonicFail:
return Error(Loc, "unrecognized instruction mnemonic");
default:
- assert(0 && "unexpected error code!");
- return Error(Loc, "invalid instruction format");
+ llvm_unreachable("unexpected error code!");
}
}
@@ -3456,23 +3579,23 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
unsigned &ErrorInfo,
bool MatchingInlineAsm) {
assert(!Operands.empty() && "Unexpect empty operand list!");
- AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[0]);
- assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+ AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[0]);
+ assert(Op.isToken() && "Leading operand should always be a mnemonic!");
- StringRef Tok = Op->getToken();
+ StringRef Tok = Op.getToken();
unsigned NumOperands = Operands.size();
if (NumOperands == 4 && Tok == "lsl") {
- AArch64Operand *Op2 = static_cast<AArch64Operand *>(Operands[2]);
- AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]);
- if (Op2->isReg() && Op3->isImm()) {
- const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
+ AArch64Operand &Op2 = static_cast<AArch64Operand &>(*Operands[2]);
+ AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
+ if (Op2.isReg() && Op3.isImm()) {
+ const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm());
if (Op3CE) {
uint64_t Op3Val = Op3CE->getValue();
uint64_t NewOp3Val = 0;
uint64_t NewOp4Val = 0;
if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains(
- Op2->getReg())) {
+ Op2.getReg())) {
NewOp3Val = (32 - Op3Val) & 0x1f;
NewOp4Val = 31 - Op3Val;
} else {
@@ -3484,26 +3607,24 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
const MCExpr *NewOp4 = MCConstantExpr::Create(NewOp4Val, getContext());
Operands[0] = AArch64Operand::CreateToken(
- "ubfm", false, Op->getStartLoc(), getContext());
- Operands[3] = AArch64Operand::CreateImm(NewOp3, Op3->getStartLoc(),
- Op3->getEndLoc(), getContext());
+ "ubfm", false, Op.getStartLoc(), getContext());
Operands.push_back(AArch64Operand::CreateImm(
- NewOp4, Op3->getStartLoc(), Op3->getEndLoc(), getContext()));
- delete Op3;
- delete Op;
+ NewOp4, Op3.getStartLoc(), Op3.getEndLoc(), getContext()));
+ Operands[3] = AArch64Operand::CreateImm(NewOp3, Op3.getStartLoc(),
+ Op3.getEndLoc(), getContext());
}
}
} else if (NumOperands == 5) {
// FIXME: Horrible hack to handle the BFI -> BFM, SBFIZ->SBFM, and
// UBFIZ -> UBFM aliases.
if (Tok == "bfi" || Tok == "sbfiz" || Tok == "ubfiz") {
- AArch64Operand *Op1 = static_cast<AArch64Operand *>(Operands[1]);
- AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]);
- AArch64Operand *Op4 = static_cast<AArch64Operand *>(Operands[4]);
+ AArch64Operand &Op1 = static_cast<AArch64Operand &>(*Operands[1]);
+ AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
+ AArch64Operand &Op4 = static_cast<AArch64Operand &>(*Operands[4]);
- if (Op1->isReg() && Op3->isImm() && Op4->isImm()) {
- const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
- const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4->getImm());
+ if (Op1.isReg() && Op3.isImm() && Op4.isImm()) {
+ const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm());
+ const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4.getImm());
if (Op3CE && Op4CE) {
uint64_t Op3Val = Op3CE->getValue();
@@ -3511,21 +3632,21 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
uint64_t RegWidth = 0;
if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
- Op1->getReg()))
+ Op1.getReg()))
RegWidth = 64;
else
RegWidth = 32;
if (Op3Val >= RegWidth)
- return Error(Op3->getStartLoc(),
+ return Error(Op3.getStartLoc(),
"expected integer in range [0, 31]");
if (Op4Val < 1 || Op4Val > RegWidth)
- return Error(Op4->getStartLoc(),
+ return Error(Op4.getStartLoc(),
"expected integer in range [1, 32]");
uint64_t NewOp3Val = 0;
if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains(
- Op1->getReg()))
+ Op1.getReg()))
NewOp3Val = (32 - Op3Val) & 0x1f;
else
NewOp3Val = (64 - Op3Val) & 0x3f;
@@ -3533,7 +3654,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
uint64_t NewOp4Val = Op4Val - 1;
if (NewOp3Val != 0 && NewOp4Val >= NewOp3Val)
- return Error(Op4->getStartLoc(),
+ return Error(Op4.getStartLoc(),
"requested insert overflows register");
const MCExpr *NewOp3 =
@@ -3541,24 +3662,20 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
const MCExpr *NewOp4 =
MCConstantExpr::Create(NewOp4Val, getContext());
Operands[3] = AArch64Operand::CreateImm(
- NewOp3, Op3->getStartLoc(), Op3->getEndLoc(), getContext());
+ NewOp3, Op3.getStartLoc(), Op3.getEndLoc(), getContext());
Operands[4] = AArch64Operand::CreateImm(
- NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext());
+ NewOp4, Op4.getStartLoc(), Op4.getEndLoc(), getContext());
if (Tok == "bfi")
Operands[0] = AArch64Operand::CreateToken(
- "bfm", false, Op->getStartLoc(), getContext());
+ "bfm", false, Op.getStartLoc(), getContext());
else if (Tok == "sbfiz")
Operands[0] = AArch64Operand::CreateToken(
- "sbfm", false, Op->getStartLoc(), getContext());
+ "sbfm", false, Op.getStartLoc(), getContext());
else if (Tok == "ubfiz")
Operands[0] = AArch64Operand::CreateToken(
- "ubfm", false, Op->getStartLoc(), getContext());
+ "ubfm", false, Op.getStartLoc(), getContext());
else
llvm_unreachable("No valid mnemonic for alias?");
-
- delete Op;
- delete Op3;
- delete Op4;
}
}
@@ -3566,13 +3683,13 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// UBFX -> UBFM aliases.
} else if (NumOperands == 5 &&
(Tok == "bfxil" || Tok == "sbfx" || Tok == "ubfx")) {
- AArch64Operand *Op1 = static_cast<AArch64Operand *>(Operands[1]);
- AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]);
- AArch64Operand *Op4 = static_cast<AArch64Operand *>(Operands[4]);
+ AArch64Operand &Op1 = static_cast<AArch64Operand &>(*Operands[1]);
+ AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
+ AArch64Operand &Op4 = static_cast<AArch64Operand &>(*Operands[4]);
- if (Op1->isReg() && Op3->isImm() && Op4->isImm()) {
- const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
- const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4->getImm());
+ if (Op1.isReg() && Op3.isImm() && Op4.isImm()) {
+ const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm());
+ const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4.getImm());
if (Op3CE && Op4CE) {
uint64_t Op3Val = Op3CE->getValue();
@@ -3580,42 +3697,39 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
uint64_t RegWidth = 0;
if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
- Op1->getReg()))
+ Op1.getReg()))
RegWidth = 64;
else
RegWidth = 32;
if (Op3Val >= RegWidth)
- return Error(Op3->getStartLoc(),
+ return Error(Op3.getStartLoc(),
"expected integer in range [0, 31]");
if (Op4Val < 1 || Op4Val > RegWidth)
- return Error(Op4->getStartLoc(),
+ return Error(Op4.getStartLoc(),
"expected integer in range [1, 32]");
uint64_t NewOp4Val = Op3Val + Op4Val - 1;
if (NewOp4Val >= RegWidth || NewOp4Val < Op3Val)
- return Error(Op4->getStartLoc(),
+ return Error(Op4.getStartLoc(),
"requested extract overflows register");
const MCExpr *NewOp4 =
MCConstantExpr::Create(NewOp4Val, getContext());
Operands[4] = AArch64Operand::CreateImm(
- NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext());
+ NewOp4, Op4.getStartLoc(), Op4.getEndLoc(), getContext());
if (Tok == "bfxil")
Operands[0] = AArch64Operand::CreateToken(
- "bfm", false, Op->getStartLoc(), getContext());
+ "bfm", false, Op.getStartLoc(), getContext());
else if (Tok == "sbfx")
Operands[0] = AArch64Operand::CreateToken(
- "sbfm", false, Op->getStartLoc(), getContext());
+ "sbfm", false, Op.getStartLoc(), getContext());
else if (Tok == "ubfx")
Operands[0] = AArch64Operand::CreateToken(
- "ubfm", false, Op->getStartLoc(), getContext());
+ "ubfm", false, Op.getStartLoc(), getContext());
else
llvm_unreachable("No valid mnemonic for alias?");
-
- delete Op;
- delete Op4;
}
}
}
@@ -3626,63 +3740,58 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (NumOperands == 3 && (Tok == "sxtw" || Tok == "uxtw")) {
// The source register can be Wn here, but the matcher expects a
// GPR64. Twiddle it here if necessary.
- AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[2]);
- if (Op->isReg()) {
- unsigned Reg = getXRegFromWReg(Op->getReg());
- Operands[2] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(),
- Op->getEndLoc(), getContext());
- delete Op;
+ AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]);
+ if (Op.isReg()) {
+ unsigned Reg = getXRegFromWReg(Op.getReg());
+ Operands[2] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(),
+ Op.getEndLoc(), getContext());
}
}
// FIXME: Likewise for sxt[bh] with a Xd dst operand
else if (NumOperands == 3 && (Tok == "sxtb" || Tok == "sxth")) {
- AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]);
- if (Op->isReg() &&
+ AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]);
+ if (Op.isReg() &&
AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
- Op->getReg())) {
+ Op.getReg())) {
// The source register can be Wn here, but the matcher expects a
// GPR64. Twiddle it here if necessary.
- AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[2]);
- if (Op->isReg()) {
- unsigned Reg = getXRegFromWReg(Op->getReg());
- Operands[2] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(),
- Op->getEndLoc(), getContext());
- delete Op;
+ AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]);
+ if (Op.isReg()) {
+ unsigned Reg = getXRegFromWReg(Op.getReg());
+ Operands[2] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(),
+ Op.getEndLoc(), getContext());
}
}
}
// FIXME: Likewise for uxt[bh] with a Xd dst operand
else if (NumOperands == 3 && (Tok == "uxtb" || Tok == "uxth")) {
- AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]);
- if (Op->isReg() &&
+ AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]);
+ if (Op.isReg() &&
AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
- Op->getReg())) {
+ Op.getReg())) {
// The source register can be Wn here, but the matcher expects a
// GPR32. Twiddle it here if necessary.
- AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]);
- if (Op->isReg()) {
- unsigned Reg = getWRegFromXReg(Op->getReg());
- Operands[1] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(),
- Op->getEndLoc(), getContext());
- delete Op;
+ AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]);
+ if (Op.isReg()) {
+ unsigned Reg = getWRegFromXReg(Op.getReg());
+ Operands[1] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(),
+ Op.getEndLoc(), getContext());
}
}
}
// Yet another horrible hack to handle FMOV Rd, #0.0 using [WX]ZR.
if (NumOperands == 3 && Tok == "fmov") {
- AArch64Operand *RegOp = static_cast<AArch64Operand *>(Operands[1]);
- AArch64Operand *ImmOp = static_cast<AArch64Operand *>(Operands[2]);
- if (RegOp->isReg() && ImmOp->isFPImm() &&
- ImmOp->getFPImm() == (unsigned)-1) {
+ AArch64Operand &RegOp = static_cast<AArch64Operand &>(*Operands[1]);
+ AArch64Operand &ImmOp = static_cast<AArch64Operand &>(*Operands[2]);
+ if (RegOp.isReg() && ImmOp.isFPImm() && ImmOp.getFPImm() == (unsigned)-1) {
unsigned zreg =
AArch64MCRegisterClasses[AArch64::FPR32RegClassID].contains(
- RegOp->getReg())
+ RegOp.getReg())
? AArch64::WZR
: AArch64::XZR;
- Operands[2] = AArch64Operand::CreateReg(zreg, false, Op->getStartLoc(),
- Op->getEndLoc(), getContext());
- delete ImmOp;
+ Operands[2] = AArch64Operand::CreateReg(zreg, false, Op.getStartLoc(),
+ Op.getEndLoc(), getContext());
}
}
@@ -3735,14 +3844,14 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((AArch64Operand *)Operands[ErrorInfo])->getStartLoc();
+ ErrorLoc = ((AArch64Operand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
}
// If the match failed on a suffix token operand, tweak the diagnostic
// accordingly.
- if (((AArch64Operand *)Operands[ErrorInfo])->isToken() &&
- ((AArch64Operand *)Operands[ErrorInfo])->isTokenSuffix())
+ if (((AArch64Operand &)*Operands[ErrorInfo]).isToken() &&
+ ((AArch64Operand &)*Operands[ErrorInfo]).isTokenSuffix())
MatchResult = Match_InvalidSuffix;
return showMatchError(ErrorLoc, MatchResult);
@@ -3794,9 +3903,11 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidLabel:
case Match_MSR:
case Match_MRS: {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
// Any time we get here, there's nothing fancy to do. Just get the
// operand SMLoc and display the diagnostic.
- SMLoc ErrorLoc = ((AArch64Operand *)Operands[ErrorInfo])->getStartLoc();
+ SMLoc ErrorLoc = ((AArch64Operand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
return showMatchError(ErrorLoc, MatchResult);
@@ -3819,6 +3930,10 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveWord(8, Loc);
if (IDVal == ".tlsdesccall")
return parseDirectiveTLSDescCall(Loc);
+ if (IDVal == ".ltorg" || IDVal == ".pool")
+ return parseDirectiveLtorg(Loc);
+ if (IDVal == ".unreq")
+ return parseDirectiveUnreq(DirectiveID.getLoc());
return parseDirectiveLOH(IDVal, Loc);
}
@@ -3920,6 +4035,66 @@ bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
return false;
}
+/// parseDirectiveLtorg
+/// ::= .ltorg | .pool
+bool AArch64AsmParser::parseDirectiveLtorg(SMLoc L) {
+ getTargetStreamer().emitCurrentConstantPool();
+ return false;
+}
+
+/// parseDirectiveReq
+/// ::= name .req registername
+bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
+ Parser.Lex(); // Eat the '.req' token.
+ SMLoc SRegLoc = getLoc();
+ unsigned RegNum = tryParseRegister();
+ bool IsVector = false;
+
+ if (RegNum == static_cast<unsigned>(-1)) {
+ StringRef Kind;
+ RegNum = tryMatchVectorRegister(Kind, false);
+ if (!Kind.empty()) {
+ Error(SRegLoc, "vector register without type specifier expected");
+ return false;
+ }
+ IsVector = true;
+ }
+
+ if (RegNum == static_cast<unsigned>(-1)) {
+ Parser.eatToEndOfStatement();
+ Error(SRegLoc, "register name or alias expected");
+ return false;
+ }
+
+ // Shouldn't be anything else.
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
+ Error(Parser.getTok().getLoc(), "unexpected input in .req directive");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ Parser.Lex(); // Consume the EndOfStatement
+
+ auto pair = std::make_pair(IsVector, RegNum);
+ if (RegisterReqs.GetOrCreateValue(Name, pair).getValue() != pair)
+ Warning(L, "ignoring redefinition of register alias '" + Name + "'");
+
+ return true;
+}
+
+/// parseDirectiveUneq
+/// ::= .unreq registername
+bool AArch64AsmParser::parseDirectiveUnreq(SMLoc L) {
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(), "unexpected input in .unreq directive.");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+ RegisterReqs.erase(Parser.getTok().getIdentifier().lower());
+ Parser.Lex(); // Eat the identifier.
+ return false;
+}
+
bool
AArch64AsmParser::classifySymbolRef(const MCExpr *Expr,
AArch64MCExpr::VariantKind &ELFRefKind,
@@ -3986,9 +4161,9 @@ extern "C" void LLVMInitializeAArch64AsmParser() {
// Define this matcher function after the auto-generated include so we
// have the match class enum definitions.
-unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
+unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
unsigned Kind) {
- AArch64Operand *Op = static_cast<AArch64Operand *>(AsmOp);
+ AArch64Operand &Op = static_cast<AArch64Operand &>(AsmOp);
// If the kind is a token for a literal immediate, check if our asm
// operand matches. This is for InstAliases which have a fixed-value
// immediate in the syntax.
@@ -4036,9 +4211,9 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
ExpectedVal = 8;
break;
}
- if (!Op->isImm())
+ if (!Op.isImm())
return Match_InvalidOperand;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm());
if (!CE)
return Match_InvalidOperand;
if (CE->getValue() == ExpectedVal)
diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index 2466368..2057c51 100644
--- a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -37,8 +37,7 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) {
case LLVMDisassembler_VariantKind_ARM64_TLVP:
case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
default:
- assert(0 && "bad LLVMDisassembler_VariantKind");
- return MCSymbolRefExpr::VK_None;
+ llvm_unreachable("bad LLVMDisassembler_VariantKind");
}
}
diff --git a/lib/Target/AArch64/Disassembler/CMakeLists.txt b/lib/Target/AArch64/Disassembler/CMakeLists.txt
index be4ccad..d64c05b 100644
--- a/lib/Target/AArch64/Disassembler/CMakeLists.txt
+++ b/lib/Target/AArch64/Disassembler/CMakeLists.txt
@@ -4,11 +4,5 @@ add_llvm_library(LLVMAArch64Disassembler
AArch64Disassembler.cpp
AArch64ExternalSymbolizer.cpp
)
-# workaround for hanging compilation on MSVC8, 9 and 10
-#if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
-#set_property(
-# SOURCE ARMDisassembler.cpp
-# PROPERTY COMPILE_FLAGS "/Od"
-# )
-#endif()
+
add_dependencies(LLVMAArch64Disassembler AArch64CommonTableGen)
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index f484a5b..8a21f06 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -918,7 +918,7 @@ void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
else
O << getRegisterName(Reg);
} else
- assert(0 && "unknown operand kind in printPostIncOperand64");
+ llvm_unreachable("unknown operand kind in printPostIncOperand64");
}
void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
@@ -1109,7 +1109,7 @@ static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
while (Stride--) {
switch (Reg) {
default:
- assert(0 && "Vector register expected!");
+ llvm_unreachable("Vector register expected!");
case AArch64::Q0: Reg = AArch64::Q1; break;
case AArch64::Q1: Reg = AArch64::Q2; break;
case AArch64::Q2: Reg = AArch64::Q3; break;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index d8900d4..a917616 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -86,7 +86,7 @@ public:
static unsigned getFixupKindNumBytes(unsigned Kind) {
switch (Kind) {
default:
- assert(0 && "Unknown fixup kind!");
+ llvm_unreachable("Unknown fixup kind!");
case AArch64::fixup_aarch64_tlsdesc_call:
return 0;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index dc4a8bf..1763b40 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -96,4 +96,6 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(StringRef TT) {
ExceptionsType = ExceptionHandling::DwarfCFI;
UseIntegratedAssembler = true;
+
+ HasIdentDirective = true;
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 464a18c..f051357 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -218,13 +218,9 @@ AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
const MCSubtargetInfo &STI) const {
if (MO.isReg())
return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
- else {
- assert(MO.isImm() && "did not expect relocated expression");
- return static_cast<unsigned>(MO.getImm());
- }
- assert(0 && "Unable to encode MCOperand!");
- return 0;
+ assert(MO.isImm() && "did not expect relocated expression");
+ return static_cast<unsigned>(MO.getImm());
}
template<unsigned FixupKind> uint32_t
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index 85c3ec7..42a6787 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -81,37 +81,8 @@ void AArch64MCExpr::PrintImpl(raw_ostream &OS) const {
OS << *Expr;
}
-// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
-// that method should be made public?
-// FIXME: really do above: now that two backends are using it.
-static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
- switch (Value->getKind()) {
- case MCExpr::Target:
- llvm_unreachable("Can't handle nested target expr!");
- break;
-
- case MCExpr::Constant:
- break;
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
- AddValueSymbolsImpl(BE->getLHS(), Asm);
- AddValueSymbolsImpl(BE->getRHS(), Asm);
- break;
- }
-
- case MCExpr::SymbolRef:
- Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
- break;
-
- case MCExpr::Unary:
- AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
- break;
- }
-}
-
-void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const {
- AddValueSymbolsImpl(getSubExpr(), Asm);
+void AArch64MCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+ Streamer.visitUsedExpr(*getSubExpr());
}
const MCSection *AArch64MCExpr::FindAssociatedSection() const {
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index e869ed0..5422f9d 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -147,7 +147,7 @@ public:
void PrintImpl(raw_ostream &OS) const override;
- void AddValueSymbols(MCAssembler *) const override;
+ void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 5c86189..ba95366 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -75,7 +75,7 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
Log2Size = llvm::Log2_32(4);
switch (Sym->getKind()) {
default:
- assert(0 && "Unexpected symbol reference variant kind!");
+ llvm_unreachable("Unexpected symbol reference variant kind!");
case MCSymbolRefExpr::VK_PAGEOFF:
RelocType = unsigned(MachO::ARM64_RELOC_PAGEOFF12);
return true;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
new file mode 100644
index 0000000..f9aeb35
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -0,0 +1,40 @@
+//===- AArch64TargetStreamer.cpp - AArch64TargetStreamer class --*- C++ -*---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64TargetStreamer class.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/MapVector.h"
+#include "llvm/MC/ConstantPools.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+
+//
+// AArch64TargetStreamer Implemenation
+//
+AArch64TargetStreamer::AArch64TargetStreamer(MCStreamer &S)
+ : MCTargetStreamer(S), ConstantPools(new AssemblerConstantPools()) {}
+
+AArch64TargetStreamer::~AArch64TargetStreamer() {}
+
+// The constant pool handling is shared by all AArch64TargetStreamer
+// implementations.
+const MCExpr *AArch64TargetStreamer::addConstantPoolEntry(const MCExpr *Expr) {
+ return ConstantPools->addEntry(Streamer, Expr);
+}
+
+void AArch64TargetStreamer::emitCurrentConstantPool() {
+ ConstantPools->emitForCurrentSection(Streamer);
+}
+
+// finish() - write out any non-empty assembler constant pools.
+void AArch64TargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
diff --git a/lib/Target/AArch64/MCTargetDesc/Android.mk b/lib/Target/AArch64/MCTargetDesc/Android.mk
index e9d2323..a23c0e5 100644
--- a/lib/Target/AArch64/MCTargetDesc/Android.mk
+++ b/lib/Target/AArch64/MCTargetDesc/Android.mk
@@ -14,7 +14,8 @@ aarch64_mc_desc_SRC_FILES := \
AArch64MCAsmInfo.cpp \
AArch64MCCodeEmitter.cpp \
AArch64MCExpr.cpp \
- AArch64MCTargetDesc.cpp
+ AArch64MCTargetDesc.cpp \
+ AArch64TargetStreamer.cpp
# For the host
# =====================================================
diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
index 7d5bced..6d8be5e 100644
--- a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_library(LLVMAArch64Desc
AArch64MCExpr.cpp
AArch64MCTargetDesc.cpp
AArch64MachObjectWriter.cpp
+ AArch64TargetStreamer.cpp
)
add_dependencies(LLVMAArch64Desc AArch64CommonTableGen)
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 9e4c389..9d2ce21 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -233,23 +233,9 @@ inline static const char *getCondCodeName(CondCode Code) {
}
inline static CondCode getInvertedCondCode(CondCode Code) {
- switch (Code) {
- default: llvm_unreachable("Unknown condition code");
- case EQ: return NE;
- case NE: return EQ;
- case HS: return LO;
- case LO: return HS;
- case MI: return PL;
- case PL: return MI;
- case VS: return VC;
- case VC: return VS;
- case HI: return LS;
- case LS: return HI;
- case GE: return LT;
- case LT: return GE;
- case GT: return LE;
- case LE: return GT;
- }
+ // To reverse a condition it's necessary to only invert the low bit:
+
+ return static_cast<CondCode>(static_cast<unsigned>(Code) ^ 0x1);
}
/// Given a condition code, return NZCV flags that would satisfy that condition.