diff options
Diffstat (limited to 'lib/Target/AArch64')
-rw-r--r-- | lib/Target/AArch64/AArch64FrameLowering.cpp | 55 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64ISelLowering.cpp | 53 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstrInfo.td | 43 | ||||
-rw-r--r-- | lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 83 | ||||
-rw-r--r-- | lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp | 16 | ||||
-rw-r--r-- | lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp | 11 |
6 files changed, 111 insertions, 150 deletions
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index cca6d12..572617c 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -349,59 +349,6 @@ AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF, return TopOfFrameOffset - FrameRegPos; } -/// Estimate and return the size of the frame. -static unsigned estimateStackSize(MachineFunction &MF) { - // FIXME: Make generic? Really consider after upstreaming. This code is now - // shared between PEI, ARM *and* here. - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); - unsigned MaxAlign = MFI->getMaxAlignment(); - int Offset = 0; - - // This code is very, very similar to PEI::calculateFrameObjectOffsets(). - // It really should be refactored to share code. Until then, changes - // should keep in mind that there's tight coupling between the two. - - for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -MFI->getObjectOffset(i); - if (FixedOff > Offset) Offset = FixedOff; - } - for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { - if (MFI->isDeadObjectIndex(i)) - continue; - Offset += MFI->getObjectSize(i); - unsigned Align = MFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; - - MaxAlign = std::max(Align, MaxAlign); - } - - if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF)) - Offset += MFI->getMaxCallFrameSize(); - - // Round up the size to a multiple of the alignment. If the function has - // any calls or alloca's, align to the target's StackAlignment value to - // ensure that the callee's frame or the alloca data is suitably aligned; - // otherwise, for leaf functions, align to the TransientStackAlignment - // value. - unsigned StackAlign; - if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || - (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0)) - StackAlign = TFI->getStackAlignment(); - else - StackAlign = TFI->getTransientStackAlignment(); - - // If the frame pointer is eliminated, all frame offsets will be relative to - // SP not FP. Align to MaxAlign so this works. - StackAlign = std::max(StackAlign, MaxAlign); - unsigned AlignMask = StackAlign - 1; - Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); - - return (unsigned)Offset; -} - void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { @@ -422,7 +369,7 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // callee-save register for this purpose or allocate an extra spill slot. bool BigStack = - (RS && estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)) + (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)) || MFI->hasVarSizedObjects() // Access will be from X29: messes things up || (MFI->adjustsStack() && !hasReservedCallFrame(MF)); diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index cea7f91..e9f4497 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -200,6 +200,8 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); // Virtually no operation on f128 is legal, but LLVM can't expand them when // there's a valid register class, so we need custom operations in most cases. @@ -217,6 +219,7 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::FREM, MVT::f128, Expand); setOperationAction(ISD::FRINT, MVT::f128, Expand); setOperationAction(ISD::FSIN, MVT::f128, Expand); + setOperationAction(ISD::FSINCOS, MVT::f128, Expand); setOperationAction(ISD::FSQRT, MVT::f128, Expand); setOperationAction(ISD::FSUB, MVT::f128, Custom); setOperationAction(ISD::FTRUNC, MVT::f128, Expand); @@ -341,8 +344,7 @@ AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // ldxr dest, ptr // <binop> scratch, dest, incr // stxr stxr_status, scratch, ptr - // cmp stxr_status, #0 - // b.ne loopMBB + // cbnz stxr_status, loopMBB // fallthrough --> exitMBB BB = loopMBB; BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); @@ -364,10 +366,8 @@ AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr); - BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp)) - .addReg(stxr_status).addImm(0); - BuildMI(BB, dl, TII->get(AArch64::Bcc)) - .addImm(A64CC::NE).addMBB(loopMBB); + BuildMI(BB, dl, TII->get(AArch64::CBNZw)) + .addReg(stxr_status).addMBB(loopMBB); BB->addSuccessor(loopMBB); BB->addSuccessor(exitMBB); @@ -437,8 +437,7 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI, // cmp incr, dest (, sign extend if necessary) // csel scratch, dest, incr, cond // stxr stxr_status, scratch, ptr - // cmp stxr_status, #0 - // b.ne loopMBB + // cbnz stxr_status, loopMBB // fallthrough --> exitMBB BB = loopMBB; BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); @@ -457,10 +456,8 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI, BuildMI(BB, dl, TII->get(strOpc), stxr_status) .addReg(scratch).addReg(ptr); - BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp)) - .addReg(stxr_status).addImm(0); - BuildMI(BB, dl, TII->get(AArch64::Bcc)) - .addImm(A64CC::NE).addMBB(loopMBB); + BuildMI(BB, dl, TII->get(AArch64::CBNZw)) + .addReg(stxr_status).addMBB(loopMBB); BB->addSuccessor(loopMBB); BB->addSuccessor(exitMBB); @@ -533,17 +530,14 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI, // loop2MBB: // strex stxr_status, newval, [ptr] - // cmp stxr_status, #0 - // b.ne loop1MBB + // cbnz stxr_status, loop1MBB BB = loop2MBB; unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr); - BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp)) - .addReg(stxr_status).addImm(0); - BuildMI(BB, dl, TII->get(AArch64::Bcc)) - .addImm(A64CC::NE).addMBB(loop1MBB); + BuildMI(BB, dl, TII->get(AArch64::CBNZw)) + .addReg(stxr_status).addMBB(loop1MBB); BB->addSuccessor(loop1MBB); BB->addSuccessor(exitMBB); @@ -1861,11 +1855,10 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, const GlobalValue *GV = GN->getGlobal(); unsigned Alignment = GV->getAlignment(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); - - if (GV->isWeakForLinker() && RelocM == Reloc::Static) { - // Weak symbols can't use ADRP/ADD pair since they should evaluate to - // zero when undefined. In PIC mode the GOT can take care of this, but in - // absolute mode we use a constant pool load. + if (GV->isWeakForLinker() && GV->isDeclaration() && RelocM == Reloc::Static) { + // Weak undefined symbols can't use ADRP/ADD pair since they should evaluate + // to zero when they remain undefined. In PIC mode the GOT can take care of + // this, but in absolute mode we use a constant pool load. SDValue PoolAddr; PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT, DAG.getTargetConstantPool(GV, PtrVT, 0, 0, @@ -1873,10 +1866,16 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, DAG.getTargetConstantPool(GV, PtrVT, 0, 0, AArch64II::MO_LO12), DAG.getConstant(8, MVT::i32)); - return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr, - MachinePointerInfo::getConstantPool(), - /*isVolatile=*/ false, /*isNonTemporal=*/ true, - /*isInvariant=*/ true, 8); + SDValue GlobalAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr, + MachinePointerInfo::getConstantPool(), + /*isVolatile=*/ false, + /*isNonTemporal=*/ true, + /*isInvariant=*/ true, 8); + if (GN->getOffset() != 0) + return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr, + DAG.getConstant(GN->getOffset(), PtrVT)); + + return GlobalAddr; } if (Alignment == 0) { diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 562a7f6..319ec97 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -159,7 +159,7 @@ let Defs = [XSP], Uses = [XSP] in { // Atomic operation pseudo-instructions //===----------------------------------------------------------------------===// -let usesCustomInserter = 1, Defs = [NZCV] in { +let usesCustomInserter = 1 in { multiclass AtomicSizes<string opname> { def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), [(set GPR32:$dst, (!cast<SDNode>(opname # "_8") GPR64:$ptr, GPR32:$incr))]>; @@ -178,11 +178,14 @@ defm ATOMIC_LOAD_AND : AtomicSizes<"atomic_load_and">; defm ATOMIC_LOAD_OR : AtomicSizes<"atomic_load_or">; defm ATOMIC_LOAD_XOR : AtomicSizes<"atomic_load_xor">; defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">; -defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">; -defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">; -defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">; -defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">; defm ATOMIC_SWAP : AtomicSizes<"atomic_swap">; +let Defs = [NZCV] in { + // These operations need a CMP to calculate the correct value + defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">; + defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">; + defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">; + defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">; +} let usesCustomInserter = 1, Defs = [NZCV] in { def ATOMIC_CMP_SWAP_I8 @@ -1942,43 +1945,41 @@ def fpz32 : Operand<f32>, ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> { let ParserMatchClass = fpzero_asmoperand; let PrintMethod = "printFPZeroOperand"; + let DecoderMethod = "DecodeFPZeroOperand"; } def fpz64 : Operand<f64>, ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> { let ParserMatchClass = fpzero_asmoperand; let PrintMethod = "printFPZeroOperand"; + let DecoderMethod = "DecodeFPZeroOperand"; } -multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, string asmop2, - dag pattern> { +multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, dag pattern> { def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0}, - (outs), ins, !strconcat("fcmp\t$Rn, ", asmop2), - [pattern], NoItinerary> { + (outs), ins, "fcmp\t$Rn, $Rm", [pattern], + NoItinerary> { let Defs = [NZCV]; } def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0}, - (outs), ins, !strconcat("fcmpe\t$Rn, ", asmop2), - [], NoItinerary> { + (outs), ins, "fcmpe\t$Rn, $Rm", [], NoItinerary> { let Defs = [NZCV]; } } -defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm), "$Rm", +defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm), (set NZCV, (A64cmp (f32 FPR32:$Rn), FPR32:$Rm))>; -defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm), "$Rm", +defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm), (set NZCV, (A64cmp (f64 FPR64:$Rn), FPR64:$Rm))>; -// What would be Rm should be written as 0, but anything is valid for -// disassembly so we can't set the bits -let PostEncoderMethod = "fixFCMPImm" in { - defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Imm), "$Imm", - (set NZCV, (A64cmp (f32 FPR32:$Rn), fpz32:$Imm))>; +// What would be Rm should be written as 0; note that even though it's called +// "$Rm" here to fit in with the InstrFormats, it's actually an immediate. +defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Rm), + (set NZCV, (A64cmp (f32 FPR32:$Rn), fpz32:$Rm))>; - defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Imm), "$Imm", - (set NZCV, (A64cmp (f64 FPR64:$Rn), fpz64:$Imm))>; -} +defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Rm), + (set NZCV, (A64cmp (f64 FPR64:$Rn), fpz64:$Rm))>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index c1695da..69bb80a 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -160,44 +160,53 @@ private: SMLoc StartLoc, EndLoc; + struct ImmWithLSLOp { + const MCExpr *Val; + unsigned ShiftAmount; + bool ImplicitAmount; + }; + + struct CondCodeOp { + A64CC::CondCodes Code; + }; + + struct FPImmOp { + double Val; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct RegOp { + unsigned RegNum; + }; + + struct ShiftExtendOp { + A64SE::ShiftExtSpecifiers ShiftType; + unsigned Amount; + bool ImplicitAmount; + }; + + struct SysRegOp { + const char *Data; + unsigned Length; + }; + + struct TokOp { + const char *Data; + unsigned Length; + }; + union { - struct { - const MCExpr *Val; - unsigned ShiftAmount; - bool ImplicitAmount; - } ImmWithLSL; - - struct { - A64CC::CondCodes Code; - } CondCode; - - struct { - double Val; - } FPImm; - - struct { - const MCExpr *Val; - } Imm; - - struct { - unsigned RegNum; - } Reg; - - struct { - A64SE::ShiftExtSpecifiers ShiftType; - unsigned Amount; - bool ImplicitAmount; - } ShiftExtend; - - struct { - const char *Data; - unsigned Length; - } SysReg; - - struct { - const char *Data; - unsigned Length; - } Tok; + struct ImmWithLSLOp ImmWithLSL; + struct CondCodeOp CondCode; + struct FPImmOp FPImm; + struct ImmOp Imm; + struct RegOp Reg; + struct ShiftExtendOp ShiftExtend; + struct SysRegOp SysReg; + struct TokOp Tok; }; AArch64Operand(KindTy K, SMLoc S, SMLoc E) diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index eba7666..12c1b8f 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -106,6 +106,11 @@ static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst, + unsigned RmBits, + uint64_t Address, + const void *Decoder); + template<int RegWidth> static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, unsigned FullImm, @@ -381,6 +386,17 @@ static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst, + unsigned RmBits, + uint64_t Address, + const void *Decoder) { + // Any bits are valid in the instruction (they're architecturally ignored), + // but a code generator should insert 0. + Inst.addOperand(MCOperand::CreateImm(0)); + return MCDisassembler::Success; +} + + template<int RegWidth> static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 756e037..a5c591e 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -106,8 +106,6 @@ public: void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const; - unsigned fixFCMPImm(const MCInst &MI, unsigned EncodedValue) const; - template<int hasRs, int hasRt2> unsigned fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue) const; @@ -423,15 +421,6 @@ AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, return Result | getAddressWithFixup(UImm16MO, requestedFixup, Fixups); } -unsigned AArch64MCCodeEmitter::fixFCMPImm(const MCInst &MI, - unsigned EncodedValue) const { - // For FCMP[E] Rn, #0.0, the Rm field has a canonical representation - // with 0s, but is architecturally ignored - EncodedValue &= ~0x1f0000u; - - return EncodedValue; -} - template<int hasRs, int hasRt2> unsigned AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue) const { |