aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/AArch64
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AArch64')
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp55
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp53
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td43
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp83
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp16
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp11
6 files changed, 111 insertions, 150 deletions
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index cca6d12..572617c 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -349,59 +349,6 @@ AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF,
return TopOfFrameOffset - FrameRegPos;
}
-/// Estimate and return the size of the frame.
-static unsigned estimateStackSize(MachineFunction &MF) {
- // FIXME: Make generic? Really consider after upstreaming. This code is now
- // shared between PEI, ARM *and* here.
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
- unsigned MaxAlign = MFI->getMaxAlignment();
- int Offset = 0;
-
- // This code is very, very similar to PEI::calculateFrameObjectOffsets().
- // It really should be refactored to share code. Until then, changes
- // should keep in mind that there's tight coupling between the two.
-
- for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
- int FixedOff = -MFI->getObjectOffset(i);
- if (FixedOff > Offset) Offset = FixedOff;
- }
- for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
- if (MFI->isDeadObjectIndex(i))
- continue;
- Offset += MFI->getObjectSize(i);
- unsigned Align = MFI->getObjectAlignment(i);
- // Adjust to alignment boundary
- Offset = (Offset+Align-1)/Align*Align;
-
- MaxAlign = std::max(Align, MaxAlign);
- }
-
- if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
- Offset += MFI->getMaxCallFrameSize();
-
- // Round up the size to a multiple of the alignment. If the function has
- // any calls or alloca's, align to the target's StackAlignment value to
- // ensure that the callee's frame or the alloca data is suitably aligned;
- // otherwise, for leaf functions, align to the TransientStackAlignment
- // value.
- unsigned StackAlign;
- if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
- (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
- StackAlign = TFI->getStackAlignment();
- else
- StackAlign = TFI->getTransientStackAlignment();
-
- // If the frame pointer is eliminated, all frame offsets will be relative to
- // SP not FP. Align to MaxAlign so this works.
- StackAlign = std::max(StackAlign, MaxAlign);
- unsigned AlignMask = StackAlign - 1;
- Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
-
- return (unsigned)Offset;
-}
-
void
AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
@@ -422,7 +369,7 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// callee-save register for this purpose or allocate an extra spill slot.
bool BigStack =
- (RS && estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
+ (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
|| MFI->hasVarSizedObjects() // Access will be from X29: messes things up
|| (MFI->adjustsStack() && !hasReservedCallFrame(MF));
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index cea7f91..e9f4497 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -200,6 +200,8 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
// Virtually no operation on f128 is legal, but LLVM can't expand them when
// there's a valid register class, so we need custom operations in most cases.
@@ -217,6 +219,7 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setOperationAction(ISD::FREM, MVT::f128, Expand);
setOperationAction(ISD::FRINT, MVT::f128, Expand);
setOperationAction(ISD::FSIN, MVT::f128, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
setOperationAction(ISD::FSQRT, MVT::f128, Expand);
setOperationAction(ISD::FSUB, MVT::f128, Custom);
setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
@@ -341,8 +344,7 @@ AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// ldxr dest, ptr
// <binop> scratch, dest, incr
// stxr stxr_status, scratch, ptr
- // cmp stxr_status, #0
- // b.ne loopMBB
+ // cbnz stxr_status, loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
@@ -364,10 +366,8 @@ AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr);
- BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp))
- .addReg(stxr_status).addImm(0);
- BuildMI(BB, dl, TII->get(AArch64::Bcc))
- .addImm(A64CC::NE).addMBB(loopMBB);
+ BuildMI(BB, dl, TII->get(AArch64::CBNZw))
+ .addReg(stxr_status).addMBB(loopMBB);
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
@@ -437,8 +437,7 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
// cmp incr, dest (, sign extend if necessary)
// csel scratch, dest, incr, cond
// stxr stxr_status, scratch, ptr
- // cmp stxr_status, #0
- // b.ne loopMBB
+ // cbnz stxr_status, loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
@@ -457,10 +456,8 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
BuildMI(BB, dl, TII->get(strOpc), stxr_status)
.addReg(scratch).addReg(ptr);
- BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp))
- .addReg(stxr_status).addImm(0);
- BuildMI(BB, dl, TII->get(AArch64::Bcc))
- .addImm(A64CC::NE).addMBB(loopMBB);
+ BuildMI(BB, dl, TII->get(AArch64::CBNZw))
+ .addReg(stxr_status).addMBB(loopMBB);
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
@@ -533,17 +530,14 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
// loop2MBB:
// strex stxr_status, newval, [ptr]
- // cmp stxr_status, #0
- // b.ne loop1MBB
+ // cbnz stxr_status, loop1MBB
BB = loop2MBB;
unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr);
- BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp))
- .addReg(stxr_status).addImm(0);
- BuildMI(BB, dl, TII->get(AArch64::Bcc))
- .addImm(A64CC::NE).addMBB(loop1MBB);
+ BuildMI(BB, dl, TII->get(AArch64::CBNZw))
+ .addReg(stxr_status).addMBB(loop1MBB);
BB->addSuccessor(loop1MBB);
BB->addSuccessor(exitMBB);
@@ -1861,11 +1855,10 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
const GlobalValue *GV = GN->getGlobal();
unsigned Alignment = GV->getAlignment();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
-
- if (GV->isWeakForLinker() && RelocM == Reloc::Static) {
- // Weak symbols can't use ADRP/ADD pair since they should evaluate to
- // zero when undefined. In PIC mode the GOT can take care of this, but in
- // absolute mode we use a constant pool load.
+ if (GV->isWeakForLinker() && GV->isDeclaration() && RelocM == Reloc::Static) {
+ // Weak undefined symbols can't use ADRP/ADD pair since they should evaluate
+ // to zero when they remain undefined. In PIC mode the GOT can take care of
+ // this, but in absolute mode we use a constant pool load.
SDValue PoolAddr;
PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
@@ -1873,10 +1866,16 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
AArch64II::MO_LO12),
DAG.getConstant(8, MVT::i32));
- return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr,
- MachinePointerInfo::getConstantPool(),
- /*isVolatile=*/ false, /*isNonTemporal=*/ true,
- /*isInvariant=*/ true, 8);
+ SDValue GlobalAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr,
+ MachinePointerInfo::getConstantPool(),
+ /*isVolatile=*/ false,
+ /*isNonTemporal=*/ true,
+ /*isInvariant=*/ true, 8);
+ if (GN->getOffset() != 0)
+ return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
+ DAG.getConstant(GN->getOffset(), PtrVT));
+
+ return GlobalAddr;
}
if (Alignment == 0) {
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 562a7f6..319ec97 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -159,7 +159,7 @@ let Defs = [XSP], Uses = [XSP] in {
// Atomic operation pseudo-instructions
//===----------------------------------------------------------------------===//
-let usesCustomInserter = 1, Defs = [NZCV] in {
+let usesCustomInserter = 1 in {
multiclass AtomicSizes<string opname> {
def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
[(set GPR32:$dst, (!cast<SDNode>(opname # "_8") GPR64:$ptr, GPR32:$incr))]>;
@@ -178,11 +178,14 @@ defm ATOMIC_LOAD_AND : AtomicSizes<"atomic_load_and">;
defm ATOMIC_LOAD_OR : AtomicSizes<"atomic_load_or">;
defm ATOMIC_LOAD_XOR : AtomicSizes<"atomic_load_xor">;
defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">;
-defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">;
-defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">;
-defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">;
-defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">;
defm ATOMIC_SWAP : AtomicSizes<"atomic_swap">;
+let Defs = [NZCV] in {
+ // These operations need a CMP to calculate the correct value
+ defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">;
+ defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">;
+ defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">;
+ defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">;
+}
let usesCustomInserter = 1, Defs = [NZCV] in {
def ATOMIC_CMP_SWAP_I8
@@ -1942,43 +1945,41 @@ def fpz32 : Operand<f32>,
ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> {
let ParserMatchClass = fpzero_asmoperand;
let PrintMethod = "printFPZeroOperand";
+ let DecoderMethod = "DecodeFPZeroOperand";
}
def fpz64 : Operand<f64>,
ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> {
let ParserMatchClass = fpzero_asmoperand;
let PrintMethod = "printFPZeroOperand";
+ let DecoderMethod = "DecodeFPZeroOperand";
}
-multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, string asmop2,
- dag pattern> {
+multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, dag pattern> {
def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0},
- (outs), ins, !strconcat("fcmp\t$Rn, ", asmop2),
- [pattern], NoItinerary> {
+ (outs), ins, "fcmp\t$Rn, $Rm", [pattern],
+ NoItinerary> {
let Defs = [NZCV];
}
def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0},
- (outs), ins, !strconcat("fcmpe\t$Rn, ", asmop2),
- [], NoItinerary> {
+ (outs), ins, "fcmpe\t$Rn, $Rm", [], NoItinerary> {
let Defs = [NZCV];
}
}
-defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm), "$Rm",
+defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm),
(set NZCV, (A64cmp (f32 FPR32:$Rn), FPR32:$Rm))>;
-defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm), "$Rm",
+defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm),
(set NZCV, (A64cmp (f64 FPR64:$Rn), FPR64:$Rm))>;
-// What would be Rm should be written as 0, but anything is valid for
-// disassembly so we can't set the bits
-let PostEncoderMethod = "fixFCMPImm" in {
- defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Imm), "$Imm",
- (set NZCV, (A64cmp (f32 FPR32:$Rn), fpz32:$Imm))>;
+// What would be Rm should be written as 0; note that even though it's called
+// "$Rm" here to fit in with the InstrFormats, it's actually an immediate.
+defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Rm),
+ (set NZCV, (A64cmp (f32 FPR32:$Rn), fpz32:$Rm))>;
- defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Imm), "$Imm",
- (set NZCV, (A64cmp (f64 FPR64:$Rn), fpz64:$Imm))>;
-}
+defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Rm),
+ (set NZCV, (A64cmp (f64 FPR64:$Rn), fpz64:$Rm))>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index c1695da..69bb80a 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -160,44 +160,53 @@ private:
SMLoc StartLoc, EndLoc;
+ struct ImmWithLSLOp {
+ const MCExpr *Val;
+ unsigned ShiftAmount;
+ bool ImplicitAmount;
+ };
+
+ struct CondCodeOp {
+ A64CC::CondCodes Code;
+ };
+
+ struct FPImmOp {
+ double Val;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ struct ShiftExtendOp {
+ A64SE::ShiftExtSpecifiers ShiftType;
+ unsigned Amount;
+ bool ImplicitAmount;
+ };
+
+ struct SysRegOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
union {
- struct {
- const MCExpr *Val;
- unsigned ShiftAmount;
- bool ImplicitAmount;
- } ImmWithLSL;
-
- struct {
- A64CC::CondCodes Code;
- } CondCode;
-
- struct {
- double Val;
- } FPImm;
-
- struct {
- const MCExpr *Val;
- } Imm;
-
- struct {
- unsigned RegNum;
- } Reg;
-
- struct {
- A64SE::ShiftExtSpecifiers ShiftType;
- unsigned Amount;
- bool ImplicitAmount;
- } ShiftExtend;
-
- struct {
- const char *Data;
- unsigned Length;
- } SysReg;
-
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
+ struct ImmWithLSLOp ImmWithLSL;
+ struct CondCodeOp CondCode;
+ struct FPImmOp FPImm;
+ struct ImmOp Imm;
+ struct RegOp Reg;
+ struct ShiftExtendOp ShiftExtend;
+ struct SysRegOp SysReg;
+ struct TokOp Tok;
};
AArch64Operand(KindTy K, SMLoc S, SMLoc E)
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index eba7666..12c1b8f 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -106,6 +106,11 @@ static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
+ unsigned RmBits,
+ uint64_t Address,
+ const void *Decoder);
+
template<int RegWidth>
static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
unsigned FullImm,
@@ -381,6 +386,17 @@ static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
+ unsigned RmBits,
+ uint64_t Address,
+ const void *Decoder) {
+ // Any bits are valid in the instruction (they're architecturally ignored),
+ // but a code generator should insert 0.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return MCDisassembler::Success;
+}
+
+
template<int RegWidth>
static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 756e037..a5c591e 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -106,8 +106,6 @@ public:
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const;
- unsigned fixFCMPImm(const MCInst &MI, unsigned EncodedValue) const;
-
template<int hasRs, int hasRt2> unsigned
fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue) const;
@@ -423,15 +421,6 @@ AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
return Result | getAddressWithFixup(UImm16MO, requestedFixup, Fixups);
}
-unsigned AArch64MCCodeEmitter::fixFCMPImm(const MCInst &MI,
- unsigned EncodedValue) const {
- // For FCMP[E] Rn, #0.0, the Rm field has a canonical representation
- // with 0s, but is architecturally ignored
- EncodedValue &= ~0x1f0000u;
-
- return EncodedValue;
-}
-
template<int hasRs, int hasRt2> unsigned
AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI,
unsigned EncodedValue) const {