diff options
Diffstat (limited to 'lib/Target/AArch64')
42 files changed, 1780 insertions, 537 deletions
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index 1ad5ac8..e6a27c3 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -60,6 +60,7 @@ def AArch64InstrInfo : InstrInfo; // AArch64 Processors supported. // include "AArch64SchedA53.td" +include "AArch64SchedA57.td" include "AArch64SchedCyclone.td" def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", @@ -89,7 +90,7 @@ def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8, FeatureCRC]>; def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>; -def : ProcessorModel<"cortex-a57", NoSchedModel, [ProcA57]>; +def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>; def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp index 04906f6..ab2c4b7 100644 --- a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp +++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp @@ -214,8 +214,8 @@ AArch64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const { if (SExt->getType() != ConsideredSExtType) return false; - for (const Use &U : SExt->uses()) { - if (isa<GetElementPtrInst>(*U)) + for (const User *U : SExt->users()) { + if (isa<GetElementPtrInst>(U)) return true; } @@ -267,8 +267,7 @@ AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) { } // Now try to get through the chain of definitions. - while (isa<Instruction>(SExt->getOperand(0))) { - Instruction *Inst = dyn_cast<Instruction>(SExt->getOperand(0)); + while (auto *Inst = dyn_cast<Instruction>(SExt->getOperand(0))) { DEBUG(dbgs() << "Try to get through:\n" << *Inst << '\n'); if (!canGetThrough(Inst) || !shouldGetThrough(Inst)) { // We cannot get through something that is not an Instruction @@ -285,10 +284,10 @@ AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) { // assertion on the type as all involved sext operation may have not // been moved yet. while (!Inst->use_empty()) { - Value::use_iterator UseIt = Inst->use_begin(); - Instruction *UseInst = dyn_cast<Instruction>(*UseIt); - assert(UseInst && "Use of sext is not an Instruction!"); - UseInst->setOperand(UseIt->getOperandNo(), SExt); + Use &U = *Inst->use_begin(); + Instruction *User = dyn_cast<Instruction>(U.getUser()); + assert(User && "User of sext is not an Instruction!"); + User->setOperand(U.getOperandNo(), SExt); } ToRemove.insert(Inst); SExt->setOperand(0, Inst->getOperand(0)); @@ -385,11 +384,11 @@ void AArch64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses, if (ToRemove.count(Inst)) continue; bool inserted = false; - for (auto Pt : CurPts) { + for (auto &Pt : CurPts) { if (DT.dominates(Inst, Pt)) { DEBUG(dbgs() << "Replace all uses of:\n" << *Pt << "\nwith:\n" << *Inst << '\n'); - (Pt)->replaceAllUsesWith(Inst); + Pt->replaceAllUsesWith(Inst); ToRemove.insert(Pt); Pt = Inst; inserted = true; @@ -436,7 +435,7 @@ void AArch64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) { bool insert = false; // #1. - for (const Use &U : SExt->uses()) { + for (const User *U : SExt->users()) { const Instruction *Inst = dyn_cast<GetElementPtrInst>(U); if (Inst && Inst->getNumOperands() > 2) { DEBUG(dbgs() << "Interesting use in GetElementPtrInst\n" << *Inst diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index c3ee9bb..cd94e24 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -211,7 +211,7 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, const MachineOperand &MO = MI->getOperand(OpNum); switch (MO.getType()) { default: - assert(0 && "<unknown operand type>"); + llvm_unreachable("<unknown operand type>"); case MachineOperand::MO_Register: { unsigned Reg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(Reg)); diff --git a/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp index 5209452..484e7e8 100644 --- a/lib/Target/AArch64/AArch64BranchRelaxation.cpp +++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp @@ -291,7 +291,7 @@ static bool isConditionalBranch(unsigned Opc) { static MachineBasicBlock *getDestBlock(MachineInstr *MI) { switch (MI->getOpcode()) { default: - assert(0 && "unexpected opcode!"); + llvm_unreachable("unexpected opcode!"); case AArch64::TBZW: case AArch64::TBNZW: case AArch64::TBZX: @@ -309,7 +309,7 @@ static MachineBasicBlock *getDestBlock(MachineInstr *MI) { static unsigned getOppositeConditionOpcode(unsigned Opc) { switch (Opc) { default: - assert(0 && "unexpected opcode!"); + llvm_unreachable("unexpected opcode!"); case AArch64::TBNZW: return AArch64::TBZW; case AArch64::TBNZX: return AArch64::TBZX; case AArch64::TBZW: return AArch64::TBNZW; @@ -325,7 +325,7 @@ static unsigned getOppositeConditionOpcode(unsigned Opc) { static unsigned getBranchDisplacementBits(unsigned Opc) { switch (Opc) { default: - assert(0 && "unexpected opcode!"); + llvm_unreachable("unexpected opcode!"); case AArch64::TBNZW: case AArch64::TBZW: case AArch64::TBNZX: diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td index ded2e17..8e8bd3d 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.td +++ b/lib/Target/AArch64/AArch64CallingConvention.td @@ -18,9 +18,6 @@ class CCIfAlign<string Align, CCAction A> : class CCIfBigEndian<CCAction A> : CCIf<"State.getTarget().getDataLayout()->isBigEndian()", A>; -class CCIfUnallocated<string Reg, CCAction A> : - CCIf<"!State.isAllocated(AArch64::" # Reg # ")", A>; - //===----------------------------------------------------------------------===// // ARM AAPCS64 Calling Convention //===----------------------------------------------------------------------===// @@ -45,7 +42,7 @@ def CC_AArch64_AAPCS : CallingConv<[ // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, // up to eight each of GPR and FPR. - CCIfType<[i1, i8, i16], CCIfUnallocated<"X7", CCPromoteToType<i32>>>, + CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], [X0, X1, X2, X3, X4, X5, X6, X7]>>, // i128 is split to two i64s, we can't fit half to register X7. @@ -120,7 +117,7 @@ def CC_AArch64_DarwinPCS : CallingConv<[ // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, // up to eight each of GPR and FPR. - CCIfType<[i1, i8, i16], CCIfUnallocated<"X7", CCPromoteToType<i32>>>, + CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], [X0, X1, X2, X3, X4, X5, X6, X7]>>, // i128 is split to two i64s, we can't fit half to register X7. @@ -143,8 +140,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[ CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, // If more than will fit in registers, pass them on the stack instead. - CCIfType<[i1, i8], CCAssignToStack<1, 1>>, - CCIfType<[i16], CCAssignToStack<2, 2>>, + CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>, + CCIf<"ValVT == MVT::i16", CCAssignToStack<2, 2>>, CCIfType<[i32, f32], CCAssignToStack<4, 4>>, CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8], CCAssignToStack<8, 8>>, @@ -172,12 +169,11 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ // 32bit quantity as undef. def CC_AArch64_WebKit_JS : CallingConv<[ // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0). - CCIfType<[i1, i8, i16], CCIfUnallocated<"X0", CCPromoteToType<i32>>>, + CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>, CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>, // Pass the remaining arguments on the stack instead. - CCIfType<[i1, i8, i16], CCAssignToStack<4, 4>>, CCIfType<[i32, f32], CCAssignToStack<4, 4>>, CCIfType<[i64, f64], CCAssignToStack<8, 8>> ]>; diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index c3b5369..2164d77 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -240,21 +240,15 @@ unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) { } unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) { - // We can't handle thread-local variables quickly yet. Unfortunately we have - // to peer through any aliases to find out if that rule applies. - const GlobalValue *TLSGV = GV; - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - TLSGV = GA->getAliasee(); + // We can't handle thread-local variables quickly yet. + if (GV->isThreadLocal()) + return 0; // MachO still uses GOT for large code-model accesses, but ELF requires // movz/movk sequences, which FastISel doesn't handle yet. if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO()) return 0; - if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(TLSGV)) - if (GVar->isThreadLocal()) - return 0; - unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); EVT DestEVT = TLI.getValueType(GV->getType(), true); @@ -469,11 +463,18 @@ bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT, break; } - // FIXME: If this is a stack pointer and the offset needs to be simplified - // then put the alloca address into a register, set the base type back to - // register and continue. This should almost never happen. + //If this is a stack pointer and the offset needs to be simplified then put + // the alloca address into a register, set the base type back to register and + // continue. This should almost never happen. if (needsLowering && Addr.getKind() == Address::FrameIndexBase) { - return false; + unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), + ResultReg) + .addFrameIndex(Addr.getFI()) + .addImm(0) + .addImm(0); + Addr.setKind(Address::RegBase); + Addr.setReg(ResultReg); } // Since the offset is too large for the load/store instruction get the @@ -1224,7 +1225,6 @@ bool AArch64FastISel::ProcessCallArgs( Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ false); if (Arg == 0) return false; - ArgVT = DestVT; break; } case CCValAssign::AExt: @@ -1235,7 +1235,6 @@ bool AArch64FastISel::ProcessCallArgs( Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ true); if (Arg == 0) return false; - ArgVT = DestVT; break; } default: @@ -1254,7 +1253,7 @@ bool AArch64FastISel::ProcessCallArgs( assert(VA.isMemLoc() && "Assuming store on stack."); // Need to store on the stack. - unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; + unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; unsigned BEAlign = 0; if (ArgSize < 8 && !Subtarget->isLittleEndian()) @@ -1468,10 +1467,12 @@ bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src, bool RV; unsigned ResultReg; RV = EmitLoad(VT, ResultReg, Src); - assert(RV == true && "Should be able to handle this load."); + if (!RV) + return false; + RV = EmitStore(VT, ResultReg, Dest); - assert(RV == true && "Should be able to handle this store."); - (void)RV; + if (!RV) + return false; int64_t Size = VT.getSizeInBits() / 8; Len -= Size; @@ -1749,6 +1750,17 @@ unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) { unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt) { assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); + + // FastISel does not have plumbing to deal with extensions where the SrcVT or + // DestVT are odd things, so test to make sure that they are both types we can + // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise + // bail out to SelectionDAG. + if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && + (DestVT != MVT::i32) && (DestVT != MVT::i64)) || + ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && + (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) + return 0; + unsigned Opc; unsigned Imm = 0; @@ -1895,6 +1907,7 @@ bool AArch64FastISel::SelectMul(const Instruction *I) { case MVT::i32: ZReg = AArch64::WZR; Opc = AArch64::MADDWrrr; + SrcVT = MVT::i32; break; case MVT::i64: ZReg = AArch64::XZR; diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index deb306a..9c33717 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -158,7 +158,7 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves( MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - const AArch64InstrInfo *TII = TM.getInstrInfo(); + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); DebugLoc DL = MBB.findDebugLoc(MBBI); // Add callee saved registers to move list. @@ -204,8 +204,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock::iterator MBBI = MBB.begin(); const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); - const AArch64RegisterInfo *RegInfo = TM.getRegisterInfo(); - const AArch64InstrInfo *TII = TM.getInstrInfo(); + const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( + MF.getTarget().getRegisterInfo()); + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h index 0e00d16..7686e6f 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.h +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -18,18 +18,11 @@ namespace llvm { -class AArch64Subtarget; -class AArch64TargetMachine; - class AArch64FrameLowering : public TargetFrameLowering { - const AArch64TargetMachine &TM; - public: - explicit AArch64FrameLowering(const AArch64TargetMachine &TM, - const AArch64Subtarget &STI) + explicit AArch64FrameLowering() : TargetFrameLowering(StackGrowsDown, 16, 0, 16, - false /*StackRealignable*/), - TM(TM) {} + false /*StackRealignable*/) {} void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 7007ffc..3f49fab 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -153,9 +153,6 @@ public: SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); - SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node); - SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node); - SDNode *SelectBitfieldExtractOp(SDNode *N); SDNode *SelectBitfieldInsertOp(SDNode *N); @@ -596,8 +593,9 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, const GlobalValue *GV = GAN->getGlobal(); unsigned Alignment = GV->getAlignment(); const DataLayout *DL = TLI->getDataLayout(); - if (Alignment == 0 && !Subtarget->isTargetDarwin()) - Alignment = DL->getABITypeAlignment(GV->getType()->getElementType()); + Type *Ty = GV->getType()->getElementType(); + if (Alignment == 0 && Ty->isSized() && !Subtarget->isTargetDarwin()) + Alignment = DL->getABITypeAlignment(Ty); if (Alignment >= Size) return true; @@ -2111,7 +2109,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { .getVectorElementType() .getSizeInBits()) { default: - assert(0 && "Unexpected vector element type!"); + llvm_unreachable("Unexpected vector element type!"); case 64: SubReg = AArch64::dsub; break; diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 80d6669..28d0035 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -67,15 +67,15 @@ EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden, //===----------------------------------------------------------------------===// // AArch64 Lowering public interface. //===----------------------------------------------------------------------===// -static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { - if (TM.getSubtarget<AArch64Subtarget>().isTargetDarwin()) +static TargetLoweringObjectFile *createTLOF(const Triple &TT) { + if (TT.isOSBinFormatMachO()) return new AArch64_MachoTargetObjectFile(); return new AArch64_ELFTargetObjectFile(); } -AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) - : TargetLowering(TM, createTLOF(TM)) { +AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM) + : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) { Subtarget = &TM.getSubtarget<AArch64Subtarget>(); // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so @@ -627,7 +627,7 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const { unsigned AArch64TargetLowering::getMaximalGlobalOffset() const { // FIXME: On AArch64, this depends on the type. - // Basically, the addressable offsets are o to 4095 * Ty.getSizeInBytes(). + // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes(). // and the offset has to be a multiple of the related size in bytes. return 4095; } @@ -823,8 +823,7 @@ AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, #ifndef NDEBUG MI->dump(); #endif - assert(0 && "Unexpected instruction for custom inserter!"); - break; + llvm_unreachable("Unexpected instruction for custom inserter!"); case AArch64::F128CSEL: return EmitF128CSEL(MI, BB); @@ -833,7 +832,6 @@ AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB); } - llvm_unreachable("Unexpected instruction for custom inserter!"); } //===----------------------------------------------------------------------===// @@ -1273,7 +1271,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { bool ExtraOp = false; switch (Op.getOpcode()) { default: - assert(0 && "Invalid code"); + llvm_unreachable("Invalid code"); case ISD::ADDC: Opc = AArch64ISD::ADDS; break; @@ -1387,24 +1385,22 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { EVT InVT = Op.getOperand(0).getValueType(); EVT VT = Op.getValueType(); - // FP_TO_XINT conversion from the same type are legal. - if (VT.getSizeInBits() == InVT.getSizeInBits()) - return Op; - - if (InVT == MVT::v2f64 || InVT == MVT::v4f32) { + if (VT.getSizeInBits() < InVT.getSizeInBits()) { SDLoc dl(Op); SDValue Cv = DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(), Op.getOperand(0)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv); - } else if (InVT == MVT::v2f32) { + } + + if (VT.getSizeInBits() > InVT.getSizeInBits()) { SDLoc dl(Op); SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, Op.getOperand(0)); return DAG.getNode(Op.getOpcode(), dl, VT, Ext); } // Type changing conversions are illegal. - return SDValue(); + return Op; } SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, @@ -1440,32 +1436,23 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { SDValue In = Op.getOperand(0); EVT InVT = In.getValueType(); - // v2i32 to v2f32 is legal. - if (VT == MVT::v2f32 && InVT == MVT::v2i32) - return Op; - - // This function only handles v2f64 outputs. - if (VT == MVT::v2f64) { - // Extend the input argument to a v2i64 that we can feed into the - // floating point conversion. Zero or sign extend based on whether - // we're doing a signed or unsigned float conversion. - unsigned Opc = - Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; - assert(Op.getNumOperands() == 1 && "FP conversions take one argument"); - SDValue Promoted = DAG.getNode(Opc, dl, MVT::v2i64, Op.getOperand(0)); - return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Promoted); + if (VT.getSizeInBits() < InVT.getSizeInBits()) { + MVT CastVT = + MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()), + InVT.getVectorNumElements()); + In = DAG.getNode(Op.getOpcode(), dl, CastVT, In); + return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0)); } - // Scalarize v2i64 to v2f32 conversions. - std::vector<SDValue> BuildVectorOps; - for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { - SDValue Sclr = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, In, - DAG.getConstant(i, MVT::i64)); - Sclr = DAG.getNode(Op->getOpcode(), dl, MVT::f32, Sclr); - BuildVectorOps.push_back(Sclr); + if (VT.getSizeInBits() > InVT.getSizeInBits()) { + unsigned CastOpc = + Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + EVT CastVT = VT.changeVectorElementTypeToInteger(); + In = DAG.getNode(CastOpc, dl, CastVT, In); + return DAG.getNode(Op.getOpcode(), dl, VT, In); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BuildVectorOps); + return Op; } SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, @@ -1516,7 +1503,7 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op, StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) - .setCallee(CallingConv::Fast, RetTy, Callee, &Args, 0); + .setCallee(CallingConv::Fast, RetTy, Callee, std::move(Args), 0); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); return CallResult.first; @@ -1711,7 +1698,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments( InVals.push_back(FrameIdxN); continue; - } if (VA.isRegLoc()) { + } + + if (VA.isRegLoc()) { // Arguments stored in registers. EVT RegVT = VA.getLocVT(); @@ -1772,10 +1761,16 @@ SDValue AArch64TargetLowering::LowerFormalArguments( SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); SDValue ArgValue; + // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT) ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; + MVT MemVT = VA.getValVT(); + switch (VA.getLocInfo()) { default: break; + case CCValAssign::BCvt: + MemVT = VA.getLocVT(); + break; case CCValAssign::SExt: ExtType = ISD::SEXTLOAD; break; @@ -1787,10 +1782,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments( break; } - ArgValue = DAG.getExtLoad(ExtType, DL, VA.getValVT(), Chain, FIN, + ArgValue = DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN, MachinePointerInfo::getFixedStack(FI), - VA.getLocVT(), - false, false, false, 0); + MemVT, false, false, false, nullptr); InVals.push_back(ArgValue); } @@ -2339,11 +2333,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already // promoted to a legal register type i32, we should truncate Arg back to // i1/i8/i16. - if (Arg.getValueType().isSimple() && - Arg.getValueType().getSimpleVT() == MVT::i32 && - (VA.getLocVT() == MVT::i1 || VA.getLocVT() == MVT::i8 || - VA.getLocVT() == MVT::i16)) - Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getLocVT(), Arg); + if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 || + VA.getValVT() == MVT::i16) + Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg); SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, false, false, 0); @@ -4116,6 +4108,7 @@ static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { // shuffle in combination with VEXTs. SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"); SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned NumElts = VT.getVectorNumElements(); @@ -4164,35 +4157,47 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; int VEXTOffsets[2] = { 0, 0 }; + int OffsetMultipliers[2] = { 1, 1 }; // This loop extracts the usage patterns of the source vectors // and prepares appropriate SDValues for a shuffle if possible. for (unsigned i = 0; i < SourceVecs.size(); ++i) { - if (SourceVecs[i].getValueType() == VT) { + unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements(); + SDValue CurSource = SourceVecs[i]; + if (SourceVecs[i].getValueType().getVectorElementType() != + VT.getVectorElementType()) { + // It may hit this case if SourceVecs[i] is AssertSext/AssertZext. + // Then bitcast it to the vector which holds asserted element type, + // and record the multiplier of element width between SourceVecs and + // Build_vector which is needed to extract the correct lanes later. + EVT CastVT = + EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), + SourceVecs[i].getValueSizeInBits() / + VT.getVectorElementType().getSizeInBits()); + + CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]); + OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts; + NumSrcElts *= OffsetMultipliers[i]; + MaxElts[i] *= OffsetMultipliers[i]; + MinElts[i] *= OffsetMultipliers[i]; + } + + if (CurSource.getValueType() == VT) { // No VEXT necessary - ShuffleSrcs[i] = SourceVecs[i]; + ShuffleSrcs[i] = CurSource; VEXTOffsets[i] = 0; continue; - } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { + } else if (NumSrcElts < NumElts) { // We can pad out the smaller vector for free, so if it's part of a // shuffle... - ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, SourceVecs[i], - DAG.getUNDEF(SourceVecs[i].getValueType())); + ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, CurSource, + DAG.getUNDEF(CurSource.getValueType())); continue; } - // Don't attempt to extract subvectors from BUILD_VECTOR sources - // that expand or trunc the original value. - // TODO: We can try to bitcast and ANY_EXTEND the result but - // we need to consider the cost of vector ANY_EXTEND, and the - // legality of all the types. - if (SourceVecs[i].getValueType().getVectorElementType() != - VT.getVectorElementType()) - return SDValue(); - // Since only 64-bit and 128-bit vectors are legal on ARM and // we've eliminated the other cases... - assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 * NumElts && + assert(NumSrcElts == 2 * NumElts && "unexpected vector sizes in ReconstructShuffle"); if (MaxElts[i] - MinElts[i] >= NumElts) { @@ -4203,22 +4208,20 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, if (MinElts[i] >= NumElts) { // The extraction can just take the second half VEXTOffsets[i] = NumElts; - ShuffleSrcs[i] = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i], - DAG.getIntPtrConstant(NumElts)); + ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource, + DAG.getIntPtrConstant(NumElts)); } else if (MaxElts[i] < NumElts) { // The extraction can just take the first half VEXTOffsets[i] = 0; - ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, - SourceVecs[i], DAG.getIntPtrConstant(0)); + ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource, + DAG.getIntPtrConstant(0)); } else { // An actual VEXT is needed VEXTOffsets[i] = MinElts[i]; - SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, - SourceVecs[i], DAG.getIntPtrConstant(0)); - SDValue VEXTSrc2 = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i], - DAG.getIntPtrConstant(NumElts)); + SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource, + DAG.getIntPtrConstant(0)); + SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource, + DAG.getIntPtrConstant(NumElts)); unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1); ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2, DAG.getConstant(Imm, MVT::i32)); @@ -4238,9 +4241,10 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue(); if (ExtractVec == SourceVecs[0]) { - Mask.push_back(ExtractElt - VEXTOffsets[0]); + Mask.push_back(ExtractElt * OffsetMultipliers[0] - VEXTOffsets[0]); } else { - Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); + Mask.push_back(ExtractElt * OffsetMultipliers[1] + NumElts - + VEXTOffsets[1]); } } @@ -5177,11 +5181,37 @@ FailedModImm: return Op; } +// Normalize the operands of BUILD_VECTOR. The value of constant operands will +// be truncated to fit element width. +static SDValue NormalizeBuildVector(SDValue Op, + SelectionDAG &DAG) { + assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"); + SDLoc dl(Op); + EVT VT = Op.getValueType(); + EVT EltTy= VT.getVectorElementType(); + + if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16) + return Op; + + SmallVector<SDValue, 16> Ops; + for (unsigned I = 0, E = VT.getVectorNumElements(); I != E; ++I) { + SDValue Lane = Op.getOperand(I); + if (Lane.getOpcode() == ISD::Constant) { + APInt LowBits(EltTy.getSizeInBits(), + cast<ConstantSDNode>(Lane)->getZExtValue()); + Lane = DAG.getConstant(LowBits.getZExtValue(), MVT::i32); + } + Ops.push_back(Lane); + } + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); +} + SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { - BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); SDLoc dl(Op); EVT VT = Op.getValueType(); + Op = NormalizeBuildVector(Op, DAG); + BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); APInt CnstBits(VT.getSizeInBits(), 0); APInt UndefBits(VT.getSizeInBits(), 0); @@ -6047,18 +6077,14 @@ bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); - if (NumBits1 <= NumBits2) - return false; - return true; + return NumBits1 > NumBits2; } bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { - if (!VT1.isInteger() || !VT2.isInteger()) + if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger()) return false; unsigned NumBits1 = VT1.getSizeInBits(); unsigned NumBits2 = VT2.getSizeInBits(); - if (NumBits1 <= NumBits2) - return false; - return true; + return NumBits1 > NumBits2; } // All 32-bit GPR operations implicitly zero the high-half of the corresponding @@ -6068,18 +6094,14 @@ bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); - if (NumBits1 == 32 && NumBits2 == 64) - return true; - return false; + return NumBits1 == 32 && NumBits2 == 64; } bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { - if (!VT1.isInteger() || !VT2.isInteger()) + if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger()) return false; unsigned NumBits1 = VT1.getSizeInBits(); unsigned NumBits2 = VT2.getSizeInBits(); - if (NumBits1 == 32 && NumBits2 == 64) - return true; - return false; + return NumBits1 == 32 && NumBits2 == 64; } bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { @@ -6092,8 +6114,9 @@ bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { return false; // 8-, 16-, and 32-bit integer loads all implicitly zero-extend. - return (VT1.isSimple() && VT1.isInteger() && VT2.isSimple() && - VT2.isInteger() && VT1.getSizeInBits() <= 32); + return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() && + VT2.isSimple() && !VT2.isVector() && VT2.isInteger() && + VT1.getSizeInBits() <= 32); } bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType, @@ -6346,23 +6369,45 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { APInt Value = C->getAPIntValue(); EVT VT = N->getValueType(0); - APInt VP1 = Value + 1; - if (VP1.isPowerOf2()) { - // Multiplying by one less than a power of two, replace with a shift - // and a subtract. - SDValue ShiftedVal = - DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), - DAG.getConstant(VP1.logBase2(), MVT::i64)); - return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal, N->getOperand(0)); - } - APInt VM1 = Value - 1; - if (VM1.isPowerOf2()) { - // Multiplying by one more than a power of two, replace with a shift - // and an add. - SDValue ShiftedVal = - DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), - DAG.getConstant(VM1.logBase2(), MVT::i64)); - return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0)); + if (Value.isNonNegative()) { + // (mul x, 2^N + 1) => (add (shl x, N), x) + APInt VM1 = Value - 1; + if (VM1.isPowerOf2()) { + SDValue ShiftedVal = + DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), + DAG.getConstant(VM1.logBase2(), MVT::i64)); + return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, + N->getOperand(0)); + } + // (mul x, 2^N - 1) => (sub (shl x, N), x) + APInt VP1 = Value + 1; + if (VP1.isPowerOf2()) { + SDValue ShiftedVal = + DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), + DAG.getConstant(VP1.logBase2(), MVT::i64)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal, + N->getOperand(0)); + } + } else { + // (mul x, -(2^N + 1)) => - (add (shl x, N), x) + APInt VNM1 = -Value - 1; + if (VNM1.isPowerOf2()) { + SDValue ShiftedVal = + DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), + DAG.getConstant(VNM1.logBase2(), MVT::i64)); + SDValue Add = + DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), Add); + } + // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) + APInt VNP1 = -Value + 1; + if (VNP1.isPowerOf2()) { + SDValue ShiftedVal = + DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), + DAG.getConstant(VNP1.logBase2(), MVT::i64)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N->getOperand(0), + ShiftedVal); + } } } return SDValue(); @@ -6687,7 +6732,7 @@ static SDValue tryCombineFixedPointConvert(SDNode *N, else if (Vec.getValueType() == MVT::v2i64) VecResTy = MVT::v2f64; else - assert(0 && "unexpected vector type!"); + llvm_unreachable("unexpected vector type!"); SDValue Convert = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift); @@ -7020,7 +7065,7 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) { if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1), DAG.getConstant(-ShiftAmount, MVT::i32)); - else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount <= ElemBits) + else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1), DAG.getConstant(ShiftAmount, MVT::i32)); @@ -7867,6 +7912,18 @@ bool AArch64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { return Inst->getType()->getPrimitiveSizeInBits() <= 128; } +TargetLoweringBase::LegalizeTypeAction +AArch64TargetLowering::getPreferredVectorAction(EVT VT) const { + MVT SVT = VT.getSimpleVT(); + // During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8, + // v4i16, v2i32 instead of to promote. + if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32 + || SVT == MVT::v1f32) + return TypeWidenVector; + + return TargetLoweringBase::getPreferredVectorAction(VT); +} + Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index de16c4d..cb0b9ef 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -197,7 +197,7 @@ class AArch64TargetLowering : public TargetLowering { bool RequireStrictAlign; public: - explicit AArch64TargetLowering(AArch64TargetMachine &TM); + explicit AArch64TargetLowering(TargetMachine &TM); /// Selects the correct CCAssignFn for a the given CallingConvention /// value. @@ -324,6 +324,9 @@ public: bool shouldExpandAtomicInIR(Instruction *Inst) const override; + TargetLoweringBase::LegalizeTypeAction + getPreferredVectorAction(EVT VT) const override; + private: /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index d455d7e..5007172 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -448,13 +448,19 @@ def logical_imm64_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(enc, MVT::i32); }]>; -def LogicalImm32Operand : AsmOperandClass { - let Name = "LogicalImm32"; - let DiagnosticType = "LogicalSecondSource"; -} -def LogicalImm64Operand : AsmOperandClass { - let Name = "LogicalImm64"; - let DiagnosticType = "LogicalSecondSource"; +let DiagnosticType = "LogicalSecondSource" in { + def LogicalImm32Operand : AsmOperandClass { + let Name = "LogicalImm32"; + } + def LogicalImm64Operand : AsmOperandClass { + let Name = "LogicalImm64"; + } + def LogicalImm32NotOperand : AsmOperandClass { + let Name = "LogicalImm32Not"; + } + def LogicalImm64NotOperand : AsmOperandClass { + let Name = "LogicalImm64Not"; + } } def logical_imm32 : Operand<i32>, PatLeaf<(imm), [{ return AArch64_AM::isLogicalImmediate(N->getZExtValue(), 32); @@ -468,6 +474,12 @@ def logical_imm64 : Operand<i64>, PatLeaf<(imm), [{ let PrintMethod = "printLogicalImm64"; let ParserMatchClass = LogicalImm64Operand; } +def logical_imm32_not : Operand<i32> { + let ParserMatchClass = LogicalImm32NotOperand; +} +def logical_imm64_not : Operand<i64> { + let ParserMatchClass = LogicalImm64NotOperand; +} // imm0_65535 predicate - True if the immediate is in the range [0,65535]. def Imm0_65535Operand : AsmImmRange<0, 65535>; @@ -963,8 +975,14 @@ def ccode : Operand<i32> { let ParserMatchClass = CondCode; } def inv_ccode : Operand<i32> { + // AL and NV are invalid in the aliases which use inv_ccode let PrintMethod = "printInverseCondCode"; let ParserMatchClass = CondCode; + let MCOperandPredicate = [{ + return MCOp.isImm() && + MCOp.getImm() != AArch64CC::AL && + MCOp.getImm() != AArch64CC::NV; + }]; } // Conditional branch target. 19-bit immediate. The low two bits of the target @@ -1323,13 +1341,13 @@ class BaseMulAccum<bit isSub, bits<3> opc, RegisterClass multype, multiclass MulAccum<bit isSub, string asm, SDNode AccNode> { def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm, [(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))]>, - Sched<[WriteIM32, ReadIMA, ReadIM, ReadIM]> { + Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> { let Inst{31} = 0; } def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm, [(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))]>, - Sched<[WriteIM64, ReadIMA, ReadIM, ReadIM]> { + Sched<[WriteIM64, ReadIM, ReadIM, ReadIMA]> { let Inst{31} = 1; } } @@ -1339,7 +1357,7 @@ class WideMulAccum<bit isSub, bits<3> opc, string asm, : BaseMulAccum<isSub, opc, GPR32, GPR64, asm, [(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul (ExtNode GPR32:$Rn), (ExtNode GPR32:$Rm))))]>, - Sched<[WriteIM32, ReadIMA, ReadIM, ReadIM]> { + Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> { let Inst{31} = 1; } @@ -1738,6 +1756,10 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp> { WZR, GPR32:$src1, GPR32:$src2, 0), 5>; def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Xrs") XZR, GPR64:$src1, GPR64:$src2, 0), 5>; + def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Wrx") + WZR, GPR32sponly:$src1, GPR32:$src2, 16), 5>; + def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Xrx64") + XZR, GPR64sponly:$src1, GPR64:$src2, 24), 5>; // Register/register aliases with no shift when SP is not used. def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrs"), @@ -1925,22 +1947,32 @@ class LogicalRegAlias<string asm, Instruction inst, RegisterClass regtype> : InstAlias<asm#" $dst, $src1, $src2", (inst regtype:$dst, regtype:$src1, regtype:$src2, 0)>; -let AddedComplexity = 6 in -multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode> { +multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode, + string Alias> { + let AddedComplexity = 6 in def Wri : BaseLogicalImm<opc, GPR32sp, GPR32, logical_imm32, mnemonic, [(set GPR32sp:$Rd, (OpNode GPR32:$Rn, logical_imm32:$imm))]> { let Inst{31} = 0; let Inst{22} = 0; // 64-bit version has an additional bit of immediate. } + let AddedComplexity = 6 in def Xri : BaseLogicalImm<opc, GPR64sp, GPR64, logical_imm64, mnemonic, [(set GPR64sp:$Rd, (OpNode GPR64:$Rn, logical_imm64:$imm))]> { let Inst{31} = 1; } + + def : InstAlias<Alias # " $Rd, $Rn, $imm", + (!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32:$Rn, + logical_imm32_not:$imm), 0>; + def : InstAlias<Alias # " $Rd, $Rn, $imm", + (!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64:$Rn, + logical_imm64_not:$imm), 0>; } -multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode> { +multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode, + string Alias> { let isCompare = 1, Defs = [NZCV] in { def Wri : BaseLogicalImm<opc, GPR32, GPR32, logical_imm32, mnemonic, [(set GPR32:$Rd, (OpNode GPR32:$Rn, logical_imm32:$imm))]> { @@ -1952,6 +1984,13 @@ multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode> { let Inst{31} = 1; } } // end Defs = [NZCV] + + def : InstAlias<Alias # " $Rd, $Rn, $imm", + (!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32:$Rn, + logical_imm32_not:$imm), 0>; + def : InstAlias<Alias # " $Rd, $Rn, $imm", + (!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64:$Rn, + logical_imm64_not:$imm), 0>; } class BaseLogicalRegPseudo<RegisterClass regtype, SDPatternOperator OpNode> diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index ff115c0..ce85b2c 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -35,8 +35,14 @@ AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { - const MCInstrDesc &Desc = MI->getDesc(); + const MachineBasicBlock &MBB = *MI->getParent(); + const MachineFunction *MF = MBB.getParent(); + const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); + + if (MI->getOpcode() == AArch64::INLINEASM) + return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); + const MCInstrDesc &Desc = MI->getDesc(); switch (Desc.getOpcode()) { default: // Anything not explicitly designated otherwise is a nomal 4-byte insn. @@ -1224,7 +1230,7 @@ void AArch64InstrInfo::copyPhysRegTuple( MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef<unsigned> Indices) const { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register copy without NEON"); const TargetRegisterInfo *TRI = &getRegisterInfo(); uint16_t DestEncoding = TRI->getEncodingValue(DestReg); @@ -1385,7 +1391,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR128RegClass.contains(DestReg) && AArch64::FPR128RegClass.contains(SrcReg)) { - if(getSubTarget().hasNEON()) { + if(Subtarget.hasNEON()) { BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) .addReg(SrcReg) .addReg(SrcReg, getKillRegState(KillSrc)); @@ -1406,7 +1412,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR64RegClass.contains(DestReg) && AArch64::FPR64RegClass.contains(SrcReg)) { - if(getSubTarget().hasNEON()) { + if(Subtarget.hasNEON()) { DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub, &AArch64::FPR128RegClass); SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, @@ -1423,7 +1429,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR32RegClass.contains(DestReg) && AArch64::FPR32RegClass.contains(SrcReg)) { - if(getSubTarget().hasNEON()) { + if(Subtarget.hasNEON()) { DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub, &AArch64::FPR128RegClass); SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, @@ -1440,7 +1446,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR16RegClass.contains(DestReg) && AArch64::FPR16RegClass.contains(SrcReg)) { - if(getSubTarget().hasNEON()) { + if(Subtarget.hasNEON()) { DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR128RegClass); SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, @@ -1461,7 +1467,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR8RegClass.contains(DestReg) && AArch64::FPR8RegClass.contains(SrcReg)) { - if(getSubTarget().hasNEON()) { + if(Subtarget.hasNEON()) { DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR128RegClass); SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, @@ -1577,39 +1583,39 @@ void AArch64InstrInfo::storeRegToStackSlot( if (AArch64::FPR128RegClass.hasSubClassEq(RC)) Opc = AArch64::STRQui; else if (AArch64::DDRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Twov1d, Offset = false; } break; case 24: if (AArch64::DDDRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Threev1d, Offset = false; } break; case 32: if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Fourv1d, Offset = false; } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Twov2d, Offset = false; } break; case 48: if (AArch64::QQQRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Threev2d, Offset = false; } break; case 64: if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Fourv2d, Offset = false; } @@ -1675,39 +1681,39 @@ void AArch64InstrInfo::loadRegFromStackSlot( if (AArch64::FPR128RegClass.hasSubClassEq(RC)) Opc = AArch64::LDRQui; else if (AArch64::DDRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Twov1d, Offset = false; } break; case 24: if (AArch64::DDDRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Threev1d, Offset = false; } break; case 32: if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Fourv1d, Offset = false; } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Twov2d, Offset = false; } break; case 48: if (AArch64::QQQRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Threev2d, Offset = false; } break; case 64: if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Fourv2d, Offset = false; } @@ -1726,7 +1732,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( void llvm::emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, int Offset, - const AArch64InstrInfo *TII, + const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool SetNZCV) { if (DestReg == SrcReg && Offset == 0) return; @@ -1835,7 +1841,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, *OutUnscaledOp = 0; switch (MI.getOpcode()) { default: - assert(0 && "unhandled opcode in rewriteAArch64FrameIndex"); + llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex"); // Vector spills/fills can't take an immediate offset. case AArch64::LD1Twov2d: case AArch64::LD1Threev2d: diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index 90ce75f..f70b82b 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -44,8 +44,6 @@ public: /// always be able to get register info as well (through this method). const AArch64RegisterInfo &getRegisterInfo() const { return RI; } - const AArch64Subtarget &getSubTarget() const { return Subtarget; } - unsigned GetInstSizeInBytes(const MachineInstr *MI) const; bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, @@ -168,7 +166,7 @@ private: /// if necessary, to be replaced by the scavenger at the end of PEI. void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, int Offset, - const AArch64InstrInfo *TII, + const TargetInstrInfo *TII, MachineInstr::MIFlag = MachineInstr::NoFlags, bool SetNZCV = false); diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 9ad36e8..1211fba 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -323,7 +323,7 @@ def : Pat<(AArch64LOADgot tconstpool:$addr), // System instructions. //===----------------------------------------------------------------------===// -def HINT : HintI<"hint">; +def HINT : HintI<"hint">; def : InstAlias<"nop", (HINT 0b000)>; def : InstAlias<"yield",(HINT 0b001)>; def : InstAlias<"wfe", (HINT 0b010)>; @@ -671,10 +671,10 @@ def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; //===----------------------------------------------------------------------===// // (immediate) -defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag>; -defm AND : LogicalImm<0b00, "and", and>; -defm EOR : LogicalImm<0b10, "eor", xor>; -defm ORR : LogicalImm<0b01, "orr", or>; +defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">; +defm AND : LogicalImm<0b00, "and", and, "bic">; +defm EOR : LogicalImm<0b10, "eor", xor, "eon">; +defm ORR : LogicalImm<0b01, "orr", or, "orn">; // FIXME: these aliases *are* canonical sometimes (when movz can't be // used). Actually, it seems to be working right now, but putting logical_immXX @@ -737,6 +737,10 @@ def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>; defm CLS : OneOperandData<0b101, "cls">; defm CLZ : OneOperandData<0b100, "clz", ctlz>; defm RBIT : OneOperandData<0b000, "rbit">; + +def : Pat<(int_aarch64_rbit GPR32:$Rn), (RBITWr $Rn)>; +def : Pat<(int_aarch64_rbit GPR64:$Rn), (RBITXr $Rn)>; + def REV16Wr : OneWRegData<0b001, "rev16", UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; def REV16Xr : OneXRegData<0b001, "rev16", null_frag>; @@ -2238,6 +2242,81 @@ def : Pat<(f32_to_f16 FPR32:$Rn), def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn), [(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>; +// When converting from f16 coming directly from a load, make sure we +// load into the FPR16 registers rather than going through the GPRs. +// f16->f32 +def : Pat<(f32 (f16_to_f32 (i32 + (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend))))), + (FCVTSHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>; +def : Pat<(f32 (f16_to_f32 (i32 + (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend))))), + (FCVTSHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>; +def : Pat <(f32 (f16_to_f32 (i32 + (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), + (FCVTSHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>; +def : Pat <(f32 (f16_to_f32 (i32 + (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), + (FCVTSHr (LDURHi GPR64sp:$Rn, simm9:$offset))>; + +// f16->f64 +def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32 + (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend))))))), + (FCVTDHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>; +def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32 + (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend))))))), + (FCVTDHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>; +def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32 + (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))))), + (FCVTDHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>; +def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32 + (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))), + (FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>; + +// When converting to f16 going directly to a store, make sure we use the +// appropriate direct conversion instructions and store via the FPR16 +// registers rather than going through the GPRs. +let AddedComplexity = 10 in { +// f32->f16 +def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))), + (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend)), + (STRHroW (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend)>; +def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))), + (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend)), + (STRHroX (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend)>; +def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))), + (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), + (STRHui (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))), + (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), + (STURHi (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, simm9:$offset)>; +// f64->f16 +def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))), + (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend)), + (STRHroW (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend)>; +def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))), + (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend)), + (STRHroX (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend)>; +def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))), + (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), + (STRHui (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))), + (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), + (STURHi (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, simm9:$offset)>; +} + + //===----------------------------------------------------------------------===// // Floating point single operand instructions. //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index e7454be..3df9c4f 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -40,14 +40,13 @@ STATISTIC(NumPreFolded, "Number of pre-index updates folded"); STATISTIC(NumUnscaledPairCreated, "Number of load/store from unscaled generated"); -static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit", cl::init(20), - cl::Hidden); +static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit", + cl::init(20), cl::Hidden); // Place holder while testing unscaled load/store combining -static cl::opt<bool> -EnableAArch64UnscaledMemOp("aarch64-unscaled-mem-op", cl::Hidden, - cl::desc("Allow AArch64 unscaled load/store combining"), - cl::init(true)); +static cl::opt<bool> EnableAArch64UnscaledMemOp( + "aarch64-unscaled-mem-op", cl::Hidden, + cl::desc("Allow AArch64 unscaled load/store combining"), cl::init(true)); namespace { struct AArch64LoadStoreOpt : public MachineFunctionPass { @@ -60,19 +59,19 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { // Scan the instructions looking for a load/store that can be combined // with the current instruction into a load/store pair. // Return the matching instruction if one is found, else MBB->end(). - // If a matching instruction is found, mergeForward is set to true if the + // If a matching instruction is found, MergeForward is set to true if the // merge is to remove the first instruction and replace the second with // a pair-wise insn, and false if the reverse is true. MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, - bool &mergeForward, + bool &MergeForward, unsigned Limit); // Merge the two instructions indicated into a single pair-wise instruction. - // If mergeForward is true, erase the first instruction and fold its + // If MergeForward is true, erase the first instruction and fold its // operation into the second. If false, the reverse. Return the instruction // following the first instruction (which may change during processing). MachineBasicBlock::iterator mergePairedInsns(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Paired, bool mergeForward); + MachineBasicBlock::iterator Paired, bool MergeForward); // Scan the instruction list to find a base register update that can // be combined with the current instruction (a load or store) using @@ -142,7 +141,7 @@ static bool isUnscaledLdst(unsigned Opc) { int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) { switch (MemMI->getOpcode()) { default: - llvm_unreachable("Opcode has has unknown size!"); + llvm_unreachable("Opcode has unknown size!"); case AArch64::STRSui: case AArch64::STURSi: return 4; @@ -217,16 +216,26 @@ static unsigned getPreIndexedOpcode(unsigned Opc) { switch (Opc) { default: llvm_unreachable("Opcode has no pre-indexed equivalent!"); - case AArch64::STRSui: return AArch64::STRSpre; - case AArch64::STRDui: return AArch64::STRDpre; - case AArch64::STRQui: return AArch64::STRQpre; - case AArch64::STRWui: return AArch64::STRWpre; - case AArch64::STRXui: return AArch64::STRXpre; - case AArch64::LDRSui: return AArch64::LDRSpre; - case AArch64::LDRDui: return AArch64::LDRDpre; - case AArch64::LDRQui: return AArch64::LDRQpre; - case AArch64::LDRWui: return AArch64::LDRWpre; - case AArch64::LDRXui: return AArch64::LDRXpre; + case AArch64::STRSui: + return AArch64::STRSpre; + case AArch64::STRDui: + return AArch64::STRDpre; + case AArch64::STRQui: + return AArch64::STRQpre; + case AArch64::STRWui: + return AArch64::STRWpre; + case AArch64::STRXui: + return AArch64::STRXpre; + case AArch64::LDRSui: + return AArch64::LDRSpre; + case AArch64::LDRDui: + return AArch64::LDRDpre; + case AArch64::LDRQui: + return AArch64::LDRQpre; + case AArch64::LDRWui: + return AArch64::LDRWpre; + case AArch64::LDRXui: + return AArch64::LDRXpre; } } @@ -260,7 +269,7 @@ static unsigned getPostIndexedOpcode(unsigned Opc) { MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, - bool mergeForward) { + bool MergeForward) { MachineBasicBlock::iterator NextI = I; ++NextI; // If NextI is the second of the two instructions to be merged, we need @@ -276,12 +285,12 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, unsigned NewOpc = getMatchingPairOpcode(I->getOpcode()); // Insert our new paired instruction after whichever of the paired - // instructions mergeForward indicates. - MachineBasicBlock::iterator InsertionPoint = mergeForward ? Paired : I; - // Also based on mergeForward is from where we copy the base register operand + // instructions MergeForward indicates. + MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; + // Also based on MergeForward is from where we copy the base register operand // so we get the flags compatible with the input code. MachineOperand &BaseRegOp = - mergeForward ? Paired->getOperand(1) : I->getOperand(1); + MergeForward ? Paired->getOperand(1) : I->getOperand(1); // Which register is Rt and which is Rt2 depends on the offset order. MachineInstr *RtMI, *Rt2MI; @@ -355,8 +364,8 @@ static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) { if (IsUnscaled) { // Convert the byte-offset used by unscaled into an "element" offset used // by the scaled pair load/store instructions. - int elemOffset = Offset / OffsetStride; - if (elemOffset > 63 || elemOffset < -64) + int ElemOffset = Offset / OffsetStride; + if (ElemOffset > 63 || ElemOffset < -64) return false; } return true; @@ -374,14 +383,14 @@ static int alignTo(int Num, int PowOf2) { /// be combined with the current instruction into a load/store pair. MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, - bool &mergeForward, unsigned Limit) { + bool &MergeForward, unsigned Limit) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator MBBI = I; MachineInstr *FirstMI = I; ++MBBI; int Opc = FirstMI->getOpcode(); - bool mayLoad = FirstMI->mayLoad(); + bool MayLoad = FirstMI->mayLoad(); bool IsUnscaled = isUnscaledLdst(Opc); unsigned Reg = FirstMI->getOperand(0).getReg(); unsigned BaseReg = FirstMI->getOperand(1).getReg(); @@ -453,7 +462,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // If the destination register of the loads is the same register, bail // and keep looking. A load-pair instruction with both destination // registers the same is UNPREDICTABLE and will result in an exception. - if (mayLoad && Reg == MI->getOperand(0).getReg()) { + if (MayLoad && Reg == MI->getOperand(0).getReg()) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); continue; } @@ -462,7 +471,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // the two instructions, we can combine the second into the first. if (!ModifiedRegs[MI->getOperand(0).getReg()] && !UsedRegs[MI->getOperand(0).getReg()]) { - mergeForward = false; + MergeForward = false; return MBBI; } @@ -471,7 +480,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // second. if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] && !UsedRegs[FirstMI->getOperand(0).getReg()]) { - mergeForward = true; + MergeForward = true; return MBBI; } // Unable to combine these instructions due to interference in between. @@ -798,14 +807,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { break; } // Look ahead up to ScanLimit instructions for a pairable instruction. - bool mergeForward = false; + bool MergeForward = false; MachineBasicBlock::iterator Paired = - findMatchingInsn(MBBI, mergeForward, ScanLimit); + findMatchingInsn(MBBI, MergeForward, ScanLimit); if (Paired != E) { // Merge the loads into a pair. Keeping the iterator straight is a // pain, so we let the merge routine tell us what the next instruction // is after it's done mucking about. - MBBI = mergePairedInsns(MBBI, Paired, mergeForward); + MBBI = mergePairedInsns(MBBI, Paired, MergeForward); Modified = true; ++NumPairCreated; diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp index ab6d375..75a17b9 100644 --- a/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -51,7 +51,7 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO, AArch64II::MO_PAGEOFF) RefKind = MCSymbolRefExpr::VK_GOTPAGEOFF; else - assert(0 && "Unexpected target flags with MO_GOT on GV operand"); + llvm_unreachable("Unexpected target flags with MO_GOT on GV operand"); } else if ((MO.getTargetFlags() & AArch64II::MO_TLS) != 0) { if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE) RefKind = MCSymbolRefExpr::VK_TLVPPAGE; @@ -154,7 +154,7 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const { switch (MO.getType()) { default: - assert(0 && "unknown operand type"); + llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: // Ignore all implicit register operands. if (MO.isImplicit()) diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index 21c927f..a30e4ad 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -175,7 +175,7 @@ def GPR64all : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR, SP)>; // This is for indirect tail calls to store the address of the destination. def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X21, X22, X23, X24, X25, X26, - X27, X28)>; + X27, X28, FP, LR)>; // GPR register classes for post increment amount of vector load/store that // has alternate printing when Rm=31 and prints a constant immediate value diff --git a/lib/Target/AArch64/AArch64SchedA53.td b/lib/Target/AArch64/AArch64SchedA53.td index 0c3949e..d709bee 100644 --- a/lib/Target/AArch64/AArch64SchedA53.td +++ b/lib/Target/AArch64/AArch64SchedA53.td @@ -148,9 +148,9 @@ def : ReadAdvance<ReadVLD, 0>; // ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable // operands are needed one cycle later if and only if they are to be -// shifted. Otherwise, they too are needed two cycle later. This same +// shifted. Otherwise, they too are needed two cycles later. This same // ReadAdvance applies to Extended registers as well, even though there is -// a seperate SchedPredicate for them. +// a separate SchedPredicate for them. def : ReadAdvance<ReadI, 2, [WriteImm,WriteI, WriteISReg, WriteIEReg,WriteIS, WriteID32,WriteID64, diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td new file mode 100644 index 0000000..8209f96 --- /dev/null +++ b/lib/Target/AArch64/AArch64SchedA57.td @@ -0,0 +1,304 @@ +//=- AArch64SchedA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for ARM Cortex-A57 to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +def CortexA57Model : SchedMachineModel { + let IssueWidth = 8; // 3-way decode and 8-way issue + let MicroOpBufferSize = 128; // 128 micro-op re-order buffer + let LoadLatency = 4; // Optimistic load latency + let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Cortex-A57. +// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where +// micro-ops wait for their operands and then issue out-of-order. + +def A57UnitB : ProcResource<1> { let BufferSize = 8; } // Type B micro-ops +def A57UnitI : ProcResource<2> { let BufferSize = 8; } // Type I micro-ops +def A57UnitM : ProcResource<1> { let BufferSize = 8; } // Type M micro-ops +def A57UnitL : ProcResource<1> { let BufferSize = 8; } // Type L micro-ops +def A57UnitS : ProcResource<1> { let BufferSize = 8; } // Type S micro-ops +def A57UnitX : ProcResource<1> { let BufferSize = 8; } // Type X micro-ops +def A57UnitW : ProcResource<1> { let BufferSize = 8; } // Type W micro-ops +let SchedModel = CortexA57Model in { + def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops +} + + +let SchedModel = CortexA57Model in { + +//===----------------------------------------------------------------------===// +// Define customized scheduler read/write types specific to the Cortex-A57. + +include "AArch64SchedA57WriteRes.td" + +//===----------------------------------------------------------------------===// +// Map the target-defined scheduler read/write resources and latency for +// Cortex-A57. The Cortex-A57 types are directly associated with resources, so +// defining the aliases precludes the need for mapping them using WriteRes. The +// aliases are sufficient for creating a coarse, working model. As the model +// evolves, InstRWs will be used to override these SchedAliases. + +def : SchedAlias<WriteImm, A57Write_1cyc_1I>; +def : SchedAlias<WriteI, A57Write_1cyc_1I>; +def : SchedAlias<WriteISReg, A57Write_2cyc_1M>; +def : SchedAlias<WriteIEReg, A57Write_2cyc_1M>; +def : SchedAlias<WriteExtr, A57Write_1cyc_1I>; +def : SchedAlias<WriteIS, A57Write_1cyc_1I>; +def : SchedAlias<WriteID32, A57Write_19cyc_1M>; +def : SchedAlias<WriteID64, A57Write_35cyc_1M>; +def : SchedAlias<WriteIM32, A57Write_3cyc_1M>; +def : SchedAlias<WriteIM64, A57Write_5cyc_1M>; +def : SchedAlias<WriteBr, A57Write_1cyc_1B>; +def : SchedAlias<WriteBrReg, A57Write_1cyc_1B>; +def : SchedAlias<WriteLD, A57Write_4cyc_1L>; +def : SchedAlias<WriteST, A57Write_1cyc_1S>; +def : SchedAlias<WriteSTP, A57Write_1cyc_1S>; +def : SchedAlias<WriteAdr, A57Write_1cyc_1I>; +def : SchedAlias<WriteLDIdx, A57Write_4cyc_1I_1L>; +def : SchedAlias<WriteSTIdx, A57Write_1cyc_1I_1S>; +def : SchedAlias<WriteF, A57Write_3cyc_1V>; +def : SchedAlias<WriteFCmp, A57Write_3cyc_1V>; +def : SchedAlias<WriteFCvt, A57Write_5cyc_1V>; +def : SchedAlias<WriteFCopy, A57Write_3cyc_1V>; +def : SchedAlias<WriteFImm, A57Write_3cyc_1V>; +def : SchedAlias<WriteFMul, A57Write_5cyc_1V>; +def : SchedAlias<WriteFDiv, A57Write_18cyc_1X>; +def : SchedAlias<WriteV, A57Write_3cyc_1V>; +def : SchedAlias<WriteVLD, A57Write_5cyc_1L>; +def : SchedAlias<WriteVST, A57Write_1cyc_1S>; + +def : WriteRes<WriteSys, []> { let Latency = 1; } +def : WriteRes<WriteBarrier, []> { let Latency = 1; } +def : WriteRes<WriteHint, []> { let Latency = 1; } + +def : WriteRes<WriteLDHi, []> { let Latency = 4; } + +// Forwarding logic is not [yet] explicitly modeled beyond what is captured +// in the latencies of the A57 Generic SchedWriteRes's. +def : ReadAdvance<ReadI, 0>; +def : ReadAdvance<ReadISReg, 0>; +def : ReadAdvance<ReadIEReg, 0>; +def : ReadAdvance<ReadIM, 0>; +def : ReadAdvance<ReadIMA, 0>; +def : ReadAdvance<ReadID, 0>; +def : ReadAdvance<ReadExtrHi, 0>; +def : ReadAdvance<ReadAdrBase, 0>; +def : ReadAdvance<ReadVLD, 0>; + + +//===----------------------------------------------------------------------===// +// Specialize the coarse model by associating instruction groups with the +// subtarget-defined types. As the modeled is refined, this will override most +// of the above ShchedAlias mappings. + +// Miscellaneous +// ----------------------------------------------------------------------------- + +def : InstRW<[WriteI], (instrs COPY)>; + + +// Branch Instructions +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_1cyc_1B_1I], (instrs BL)>; +def : InstRW<[A57Write_2cyc_1B_1I], (instrs BLR)>; + + +// Divide and Multiply Instructions +// ----------------------------------------------------------------------------- + +// Multiply high +def : InstRW<[A57Write_6cyc_1M], (instrs SMULHrr, UMULHrr)>; + + +// Miscellaneous Data-Processing Instructions +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_1cyc_1I], (instrs EXTRWrri)>; +def : InstRW<[A57Write_3cyc_1I_1M], (instrs EXTRXrri)>; +def : InstRW<[A57Write_2cyc_1M], (instregex "BFM")>; + + +// Cryptography Extensions +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_3cyc_1W], (instregex "CRC32")>; + + +// Vector Load +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1i(8|16|32)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32)_POST$")>; +def : InstRW<[A57Write_5cyc_1L], (instregex "LD1i(64)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1i(64)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Rv(1d)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Rv(1d)_POST$")>; +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_7cyc_3L], (instregex "LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_7cyc_3L, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_8cyc_4L], (instregex "LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD2i(8|16)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD2i(32)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD2i(32)_POST$")>; +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2i(64)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2i(64)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2Rv(8b|4h|2s)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2Rv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_5cyc_1L], (instregex "LD2Rv(1d)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD2Rv(1d)_POST$")>; +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s)$")>; +def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD2Twov(2d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD2Twov(2d)_POST$")>; + +def : InstRW<[A57Write_9cyc_1L_3V], (instregex "LD3i(8|16)$")>; +def : InstRW<[A57Write_9cyc_1L_3V, WriteAdr], (instregex "LD3i(8|16)_POST$")>; +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD3i(32)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD3i(32)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD3i(64)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD3i(64)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD3Rv(8b|4h|2s)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD3Rv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD3Rv(1d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD3Rv(1d)_POST$")>; +def : InstRW<[A57Write_9cyc_1L_3V], (instregex "LD3Rv(16b|8h|4s)$")>; +def : InstRW<[A57Write_9cyc_1L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD3Rv(2d)$")>; +def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD3Rv(2d)_POST$")>; + +def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD3Threev(8b|4h|2s)$")>; +def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_10cyc_3L_4V], (instregex "LD3Threev(16b|8h|4s)$")>; +def : InstRW<[A57Write_10cyc_3L_4V, WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_8cyc_4L], (instregex "LD3Threev(2d)$")>; +def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD3Threev(2d)_POST$")>; + +def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4i(8|16)$")>; +def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4i(8|16)_POST$")>; +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD4i(32)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD4i(32)_POST$")>; +def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4i(64)$")>; +def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4i(64)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD4Rv(8b|4h|2s)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD4Rv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD4Rv(1d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD4Rv(1d)_POST$")>; +def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4Rv(16b|8h|4s)$")>; +def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4Rv(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_9cyc_2L_4V], (instregex "LD4Rv(2d)$")>; +def : InstRW<[A57Write_9cyc_2L_4V, WriteAdr], (instregex "LD4Rv(2d)_POST$")>; + +def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD4Fourv(8b|4h|2s)$")>; +def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_11cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s)$")>; +def : InstRW<[A57Write_11cyc_4L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_8cyc_4L], (instregex "LD4Fourv(2d)$")>; +def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; + +// Vector Store +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_1cyc_1S], (instregex "ST1i(8|16|32)$")>; +def : InstRW<[A57Write_1cyc_1S, WriteAdr], (instregex "ST1i(8|16|32)_POST$")>; +def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST1i(64)$")>; +def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST1i(64)_POST$")>; + +def : InstRW<[A57Write_1cyc_1S], (instregex "ST1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_1cyc_1S, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_2cyc_2S], (instregex "ST1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_2cyc_2S], (instregex "ST1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_4cyc_4S], (instregex "ST1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_3cyc_3S], (instregex "ST1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_3cyc_3S, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_6cyc_6S], (instregex "ST1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_6cyc_6S, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_4cyc_4S], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_8cyc_8S], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST2i(8|16|32)$")>; +def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST2i(8|16|32)_POST$")>; +def : InstRW<[A57Write_2cyc_2S], (instregex "ST2i(64)$")>; +def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST2i(64)_POST$")>; + +def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_4cyc_4S_2V], (instregex "ST2Twov(16b|8h|4s)$")>; +def : InstRW<[A57Write_4cyc_4S_2V, WriteAdr], (instregex "ST2Twov(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_4cyc_4S], (instregex "ST2Twov(2d)$")>; +def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST2Twov(2d)_POST$")>; + +def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST3i(8|16)$")>; +def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST3i(8|16)_POST$")>; +def : InstRW<[A57Write_3cyc_3S], (instregex "ST3i(32)$")>; +def : InstRW<[A57Write_3cyc_3S, WriteAdr], (instregex "ST3i(32)_POST$")>; +def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST3i(64)$")>; +def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST3i(64)_POST$")>; + +def : InstRW<[A57Write_3cyc_3S_2V], (instregex "ST3Threev(8b|4h|2s)$")>; +def : InstRW<[A57Write_3cyc_3S_2V, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_6cyc_6S_4V], (instregex "ST3Threev(16b|8h|4s)$")>; +def : InstRW<[A57Write_6cyc_6S_4V, WriteAdr], (instregex "ST3Threev(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_6cyc_6S], (instregex "ST3Threev(2d)$")>; +def : InstRW<[A57Write_6cyc_6S, WriteAdr], (instregex "ST3Threev(2d)_POST$")>; + +def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST4i(8|16)$")>; +def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST4i(8|16)_POST$")>; +def : InstRW<[A57Write_4cyc_4S], (instregex "ST4i(32)$")>; +def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST4i(32)_POST$")>; +def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST4i(64)$")>; +def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST4i(64)_POST$")>; + +def : InstRW<[A57Write_4cyc_4S_2V], (instregex "ST4Fourv(8b|4h|2s)$")>; +def : InstRW<[A57Write_4cyc_4S_2V, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_8cyc_8S_4V], (instregex "ST4Fourv(16b|8h|4s)$")>; +def : InstRW<[A57Write_8cyc_8S_4V, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_8cyc_8S], (instregex "ST4Fourv(2d)$")>; +def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; + +} // SchedModel = CortexA57Model diff --git a/lib/Target/AArch64/AArch64SchedA57WriteRes.td b/lib/Target/AArch64/AArch64SchedA57WriteRes.td new file mode 100644 index 0000000..a8f421b --- /dev/null +++ b/lib/Target/AArch64/AArch64SchedA57WriteRes.td @@ -0,0 +1,512 @@ +//=- AArch64SchedA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Contains all of the Cortex-A57 specific SchedWriteRes types. The approach +// below is to define a generic SchedWriteRes for every combination of +// latency and microOps. The naming conventions is to use a prefix, one field +// for latency, and one or more microOp count/type designators. +// Prefix: A57Write +// Latency: #cyc +// MicroOp Count/Types: #(B|I|M|L|S|X|W|V) +// +// e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are +// 11 micro-ops to be issued down one I pipe, six S pipes and four V pipes. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Define Generic 1 micro-op types + +def A57Write_5cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 5; } +def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; } +def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; } +def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; } +def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; } +def A57Write_18cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 18; } +def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19; } +def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; } +def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; } +def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; } +def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; } +def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32; } +def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35; } +def A57Write_3cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 3; } +def A57Write_3cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 3; } +def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; } +def A57Write_3cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 3; } +def A57Write_4cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 4; } +def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57Write_9cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 9; } +def A57Write_6cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 6; } +def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; } + + +//===----------------------------------------------------------------------===// +// Define Generic 2 micro-op types + +def A57Write_64cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 64; + let NumMicroOps = 2; +} +def A57Write_6cyc_1I_1L : SchedWriteRes<[A57UnitI, + A57UnitL]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_7cyc_1V_1X : SchedWriteRes<[A57UnitV, + A57UnitX]> { + let Latency = 7; + let NumMicroOps = 2; +} +def A57Write_8cyc_1L_1V : SchedWriteRes<[A57UnitL, + A57UnitV]> { + let Latency = 8; + let NumMicroOps = 2; +} +def A57Write_9cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 2; +} +def A57Write_8cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 8; + let NumMicroOps = 2; +} +def A57Write_6cyc_2L : SchedWriteRes<[A57UnitL, A57UnitL]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_6cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_6cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_5cyc_1I_1L : SchedWriteRes<[A57UnitI, + A57UnitL]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_5cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_5cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL, + A57UnitV]> { + let Latency = 10; + let NumMicroOps = 2; +} +def A57Write_10cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 2; +} +def A57Write_1cyc_1B_1I : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_1cyc_1I_1S : SchedWriteRes<[A57UnitI, + A57UnitS]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_2cyc_1B_1I : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_2cyc_2S : SchedWriteRes<[A57UnitS, A57UnitS]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_36cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 36; + let NumMicroOps = 2; +} +def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI, + A57UnitM]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_3cyc_1I_1S : SchedWriteRes<[A57UnitI, + A57UnitS]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_3cyc_1S_1V : SchedWriteRes<[A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_4cyc_1I_1L : SchedWriteRes<[A57UnitI, + A57UnitL]> { + let Latency = 4; + let NumMicroOps = 2; +} +def A57Write_4cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 4; + let NumMicroOps = 2; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 3 micro-op types + +def A57Write_10cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 3; +} +def A57Write_2cyc_1I_2S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS]> { + let Latency = 2; + let NumMicroOps = 3; +} +def A57Write_3cyc_1I_1S_1V : SchedWriteRes<[A57UnitI, + A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_3cyc_1M_2S : SchedWriteRes<[A57UnitM, + A57UnitS, A57UnitS]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_3cyc_3S : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_3cyc_2S_1V : SchedWriteRes<[A57UnitS, A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_5cyc_1I_2L : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL]> { + let Latency = 5; + let NumMicroOps = 3; +} +def A57Write_6cyc_1I_2L : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL]> { + let Latency = 6; + let NumMicroOps = 3; +} +def A57Write_6cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 6; + let NumMicroOps = 3; +} +def A57Write_7cyc_3L : SchedWriteRes<[A57UnitL, A57UnitL, A57UnitL]> { + let Latency = 7; + let NumMicroOps = 3; +} +def A57Write_8cyc_1I_1L_1V : SchedWriteRes<[A57UnitI, + A57UnitL, + A57UnitV]> { + let Latency = 8; + let NumMicroOps = 3; +} +def A57Write_8cyc_1L_2V : SchedWriteRes<[A57UnitL, + A57UnitV, A57UnitV]> { + let Latency = 8; + let NumMicroOps = 3; +} +def A57Write_8cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 8; + let NumMicroOps = 3; +} +def A57Write_9cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 3; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 4 micro-op types + +def A57Write_2cyc_2I_2S : SchedWriteRes<[A57UnitI, A57UnitI, + A57UnitS, A57UnitS]> { + let Latency = 2; + let NumMicroOps = 4; +} +def A57Write_3cyc_2I_2S : SchedWriteRes<[A57UnitI, A57UnitI, + A57UnitS, A57UnitS]> { + let Latency = 3; + let NumMicroOps = 4; +} +def A57Write_3cyc_1I_3S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, A57UnitS]> { + let Latency = 3; + let NumMicroOps = 4; +} +def A57Write_3cyc_1I_2S_1V : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 4; +} +def A57Write_4cyc_4S : SchedWriteRes<[A57UnitS, A57UnitS, + A57UnitS, A57UnitS]> { + let Latency = 4; + let NumMicroOps = 4; +} +def A57Write_7cyc_1I_3L : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, A57UnitL]> { + let Latency = 7; + let NumMicroOps = 4; +} +def A57Write_5cyc_2I_2L : SchedWriteRes<[A57UnitI, A57UnitI, + A57UnitL, A57UnitL]> { + let Latency = 5; + let NumMicroOps = 4; +} +def A57Write_8cyc_1I_1L_2V : SchedWriteRes<[A57UnitI, + A57UnitL, + A57UnitV, A57UnitV]> { + let Latency = 8; + let NumMicroOps = 4; +} +def A57Write_8cyc_4L : SchedWriteRes<[A57UnitL, A57UnitL, + A57UnitL, A57UnitL]> { + let Latency = 8; + let NumMicroOps = 4; +} +def A57Write_9cyc_2L_2V : SchedWriteRes<[A57UnitL, A57UnitL, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 4; +} +def A57Write_9cyc_1L_3V : SchedWriteRes<[A57UnitL, + A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 4; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 5 micro-op types + +def A57Write_3cyc_3S_2V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, + A57UnitV, A57UnitV]> { + let Latency = 3; + let NumMicroOps = 5; +} +def A57Write_8cyc_1I_4L : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, + A57UnitL, A57UnitL]> { + let Latency = 8; + let NumMicroOps = 5; +} +def A57Write_4cyc_1I_4S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS]> { + let Latency = 4; + let NumMicroOps = 5; +} +def A57Write_9cyc_1I_2L_2V : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 5; +} +def A57Write_9cyc_1I_1L_3V : SchedWriteRes<[A57UnitI, + A57UnitL, + A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 5; +} +def A57Write_9cyc_2L_3V : SchedWriteRes<[A57UnitL, A57UnitL, + A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 5; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 6 micro-op types + +def A57Write_3cyc_1I_3S_2V : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, A57UnitS, + A57UnitV, A57UnitV]> { + let Latency = 3; + let NumMicroOps = 6; +} +def A57Write_4cyc_2I_4S : SchedWriteRes<[A57UnitI, A57UnitI, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS]> { + let Latency = 4; + let NumMicroOps = 6; +} +def A57Write_4cyc_4S_2V : SchedWriteRes<[A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitV, A57UnitV]> { + let Latency = 4; + let NumMicroOps = 6; +} +def A57Write_6cyc_6S : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS]> { + let Latency = 6; + let NumMicroOps = 6; +} +def A57Write_9cyc_1I_2L_3V : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, + A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 6; +} +def A57Write_9cyc_1I_1L_4V : SchedWriteRes<[A57UnitI, + A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 6; +} +def A57Write_9cyc_2L_4V : SchedWriteRes<[A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 6; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 7 micro-op types + +def A57Write_10cyc_3L_4V : SchedWriteRes<[A57UnitL, A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 7; +} +def A57Write_4cyc_1I_4S_2V : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitV, A57UnitV]> { + let Latency = 4; + let NumMicroOps = 7; +} +def A57Write_6cyc_1I_6S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS]> { + let Latency = 6; + let NumMicroOps = 7; +} +def A57Write_9cyc_1I_2L_4V : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 7; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 8 micro-op types + +def A57Write_10cyc_1I_3L_4V : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 8; +} +def A57Write_11cyc_4L_4V : SchedWriteRes<[A57UnitL, A57UnitL, + A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 11; + let NumMicroOps = 8; +} +def A57Write_8cyc_8S : SchedWriteRes<[A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS]> { + let Latency = 8; + let NumMicroOps = 8; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 9 micro-op types + +def A57Write_8cyc_1I_8S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS]> { + let Latency = 8; + let NumMicroOps = 9; +} +def A57Write_11cyc_1I_4L_4V : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, + A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 11; + let NumMicroOps = 9; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 10 micro-op types + +def A57Write_6cyc_6S_4V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 6; + let NumMicroOps = 10; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 11 micro-op types + +def A57Write_6cyc_1I_6S_4V : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 6; + let NumMicroOps = 11; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 12 micro-op types + +def A57Write_8cyc_8S_4V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS, A57UnitS, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 8; + let NumMicroOps = 12; +} + +//===----------------------------------------------------------------------===// +// Define Generic 13 micro-op types + +def A57Write_8cyc_1I_8S_4V : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 8; + let NumMicroOps = 13; +} + diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index 5c65b75..1bf64fc 100644 --- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -16,9 +16,8 @@ using namespace llvm; #define DEBUG_TYPE "aarch64-selectiondag-info" -AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const TargetMachine &TM) - : TargetSelectionDAGInfo(TM), - Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {} +AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const DataLayout *DL) + : TargetSelectionDAGInfo(DL) {} AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() {} @@ -30,7 +29,9 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size); const char *bzeroEntry = - (V && V->isNullValue()) ? Subtarget->getBZeroEntry() : nullptr; + (V && V->isNullValue()) + ? DAG.getTarget().getSubtarget<AArch64Subtarget>().getBZeroEntry() + : nullptr; // For small size (< 256), it is not beneficial to use bzero // instead of memset. if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) { @@ -50,7 +51,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain) .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol(bzeroEntry, IntPtr), &Args, 0) + DAG.getExternalSymbol(bzeroEntry, IntPtr), std::move(Args), 0) .setDiscardResult(); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); return CallResult.second; diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h index 8381f99..1180eea 100644 --- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -19,12 +19,8 @@ namespace llvm { class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo { - /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can - /// make the right decision when generating code for different targets. - const AArch64Subtarget *Subtarget; - public: - explicit AArch64SelectionDAGInfo(const TargetMachine &TM); + explicit AArch64SelectionDAGInfo(const DataLayout *DL); ~AArch64SelectionDAGInfo(); SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain, diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index cd69994..bb0b72c 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -30,21 +30,35 @@ static cl::opt<bool> EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if " "converter pass"), cl::init(true), cl::Hidden); -AArch64Subtarget::AArch64Subtarget(const std::string &TT, - const std::string &CPU, - const std::string &FS, bool LittleEndian) - : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), - HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false), - HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), CPUString(CPU), - TargetTriple(TT), IsLittleEndian(LittleEndian) { +AArch64Subtarget & +AArch64Subtarget::initializeSubtargetDependencies(StringRef FS) { // Determine default and user-specified characteristics if (CPUString.empty()) CPUString = "generic"; ParseSubtargetFeatures(CPUString, FS); + return *this; } +AArch64Subtarget::AArch64Subtarget(const std::string &TT, + const std::string &CPU, + const std::string &FS, TargetMachine &TM, + bool LittleEndian) + : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), + HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false), + HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), CPUString(CPU), + TargetTriple(TT), + // This nested ternary is horrible, but DL needs to be properly + // initialized + // before TLInfo is constructed. + DL(isTargetMachO() + ? "e-m:o-i64:64-i128:128-n32:64-S128" + : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128" + : "E-m:e-i64:64-i128:128-n32:64-S128")), + FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)), + TSInfo(&DL), TLInfo(TM) {} + /// ClassifyGlobalReference - Find the target operand flags that describe /// how a global value should be referenced for the current subtarget. unsigned char diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index 590ea05..52124f6 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -14,8 +14,13 @@ #ifndef AArch64SUBTARGET_H #define AArch64SUBTARGET_H -#include "llvm/Target/TargetSubtargetInfo.h" +#include "AArch64InstrInfo.h" +#include "AArch64FrameLowering.h" +#include "AArch64ISelLowering.h" #include "AArch64RegisterInfo.h" +#include "AArch64SelectionDAGInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <string> #define GET_SUBTARGETINFO_HEADER @@ -49,15 +54,32 @@ protected: /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; - /// IsLittleEndian - Is the target little endian? - bool IsLittleEndian; + const DataLayout DL; + AArch64FrameLowering FrameLowering; + AArch64InstrInfo InstrInfo; + AArch64SelectionDAGInfo TSInfo; + AArch64TargetLowering TLInfo; +private: + /// initializeSubtargetDependencies - Initializes using CPUString and the + /// passed in feature string so that we can use initializer lists for + /// subtarget initialization. + AArch64Subtarget &initializeSubtargetDependencies(StringRef FS); public: /// This constructor initializes the data members to match that /// of the specified triple. AArch64Subtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool LittleEndian); - + const std::string &FS, TargetMachine &TM, bool LittleEndian); + + const AArch64SelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } + const AArch64FrameLowering *getFrameLowering() const { + return &FrameLowering; + } + const AArch64TargetLowering *getTargetLowering() const { + return &TLInfo; + } + const AArch64InstrInfo *getInstrInfo() const { return &InstrInfo; } + const DataLayout *getDataLayout() const { return &DL; } bool enableMachineScheduler() const override { return true; } bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; } @@ -69,7 +91,7 @@ public: bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } - bool isLittleEndian() const { return IsLittleEndian; } + bool isLittleEndian() const { return DL.isLittleEndian(); } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index 0b5dd2f..f99b90b 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -53,6 +53,12 @@ static cl::opt<bool> EnableLoadStoreOpt("aarch64-load-store-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden); +static cl::opt<bool> +EnableAtomicTidy("aarch64-atomic-cfg-tidy", cl::Hidden, + cl::desc("Run SimplifyCFG after expanding atomic operations" + " to make use of cmpxchg flow-based information"), + cl::init(true)); + extern "C" void LLVMInitializeAArch64Target() { // Register the target. RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget); @@ -71,16 +77,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL, bool LittleEndian) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, LittleEndian), - // This nested ternary is horrible, but DL needs to be properly - // initialized - // before TLInfo is constructed. - DL(Subtarget.isTargetMachO() - ? "e-m:o-i64:64-i128:128-n32:64-S128" - : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128" - : "E-m:e-i64:64-i128:128-n32:64-S128")), - InstrInfo(Subtarget), TLInfo(*this), FrameLowering(*this, Subtarget), - TSInfo(*this) { + Subtarget(TT, CPU, FS, *this, LittleEndian) { initAsmInfo(); } @@ -113,6 +110,7 @@ public: return getTM<AArch64TargetMachine>(); } + void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; bool addILPOpts() override; @@ -135,6 +133,20 @@ TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { return new AArch64PassConfig(this, PM); } +void AArch64PassConfig::addIRPasses() { + // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg + // ourselves. + addPass(createAtomicExpandLoadLinkedPass(TM)); + + // Cmpxchg instructions are often used with a subsequent comparison to + // determine whether it succeeded. We can exploit existing control-flow in + // ldrex/strex loops to simplify this, but it needs tidying up. + if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) + addPass(createCFGSimplificationPass()); + + TargetPassConfig::addIRPasses(); +} + // Pass Pipeline Configuration bool AArch64PassConfig::addPreISel() { // Run promote constant before global merge, so that the promoted constants @@ -146,10 +158,6 @@ bool AArch64PassConfig::addPreISel() { if (TM->getOptLevel() != CodeGenOpt::None) addPass(createAArch64AddressTypePromotionPass()); - // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg - // ourselves. - addPass(createAtomicExpandLoadLinkedPass(TM)); - return false; } diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h index 079b19b..852cb3f 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.h +++ b/lib/Target/AArch64/AArch64TargetMachine.h @@ -15,13 +15,9 @@ #define AArch64TARGETMACHINE_H #include "AArch64InstrInfo.h" -#include "AArch64ISelLowering.h" #include "AArch64Subtarget.h" -#include "AArch64FrameLowering.h" -#include "AArch64SelectionDAGInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/MC/MCStreamer.h" namespace llvm { @@ -29,13 +25,6 @@ class AArch64TargetMachine : public LLVMTargetMachine { protected: AArch64Subtarget Subtarget; -private: - const DataLayout DL; - AArch64InstrInfo InstrInfo; - AArch64TargetLowering TLInfo; - AArch64FrameLowering FrameLowering; - AArch64SelectionDAGInfo TSInfo; - public: AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -46,18 +35,22 @@ public: return &Subtarget; } const AArch64TargetLowering *getTargetLowering() const override { - return &TLInfo; + return getSubtargetImpl()->getTargetLowering(); + } + const DataLayout *getDataLayout() const override { + return getSubtargetImpl()->getDataLayout(); } - const DataLayout *getDataLayout() const override { return &DL; } const AArch64FrameLowering *getFrameLowering() const override { - return &FrameLowering; + return getSubtargetImpl()->getFrameLowering(); + } + const AArch64InstrInfo *getInstrInfo() const override { + return getSubtargetImpl()->getInstrInfo(); } - const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; } const AArch64RegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); + return &getInstrInfo()->getRegisterInfo(); } const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override { - return &TSInfo; + return getSubtargetImpl()->getSelectionDAGInfo(); } // Pass Pipeline Configuration diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 33e482a..1dac14b 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -306,28 +306,64 @@ unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = { // LowerVectorINT_TO_FP: { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, + + // Complex: to v2f32 + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 }, + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 }, + + // Complex: to v4f32 + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + + // Complex: to v2f64 + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + + // LowerVectorFP_TO_INT + { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 }, { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 }, + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 }, { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 }, - { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 }, - { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 }, - { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 4 }, - { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 4 }, - { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4 }, - { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 4 }, - { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 4 }, - { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 4 }, + + // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext). + { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 }, + { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 }, + { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 }, + { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 }, + { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 }, + { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 }, + + // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2 + { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 }, + { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, + { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 }, + + // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2. + { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, + { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 }, + { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 }, + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, + { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 }, + { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 }, }; int Idx = ConvertCostTableLookup<MVT>( diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 65b77c5..c42d11e 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -38,14 +38,19 @@ namespace { class AArch64Operand; class AArch64AsmParser : public MCTargetAsmParser { -public: - typedef SmallVectorImpl<MCParsedAsmOperand *> OperandVector; - private: StringRef Mnemonic; ///< Instruction mnemonic. MCSubtargetInfo &STI; MCAsmParser &Parser; + // Map of register aliases registers via the .req directive. + StringMap<std::pair<bool, unsigned> > RegisterReqs; + + AArch64TargetStreamer &getTargetStreamer() { + MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); + return static_cast<AArch64TargetStreamer &>(TS); + } + MCAsmParser &getParser() const { return Parser; } MCAsmLexer &getLexer() const { return Parser.getLexer(); } @@ -54,6 +59,7 @@ private: bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands); AArch64CC::CondCode parseCondCodeString(StringRef Cond); bool parseCondCode(OperandVector &Operands, bool invertCondCode); + unsigned matchRegisterNameAlias(StringRef Name, bool isVector); int tryParseRegister(); int tryMatchVectorRegister(StringRef &Kind, bool expected); bool parseRegister(OperandVector &Operands); @@ -70,6 +76,10 @@ private: bool parseDirectiveTLSDescCall(SMLoc L); bool parseDirectiveLOH(StringRef LOH, SMLoc L); + bool parseDirectiveLtorg(SMLoc L); + + bool parseDirectiveReq(StringRef Name, SMLoc L); + bool parseDirectiveUnreq(SMLoc L); bool validateInstruction(MCInst &Inst, SmallVectorImpl<SMLoc> &Loc); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, @@ -108,6 +118,8 @@ public: const MCTargetOptions &Options) : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { MCAsmParserExtension::Initialize(_Parser); + if (Parser.getStreamer().getTargetStreamer() == nullptr) + new AArch64TargetStreamer(Parser.getStreamer()); // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); @@ -117,7 +129,7 @@ public: SMLoc NameLoc, OperandVector &Operands) override; bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; bool ParseDirective(AsmToken DirectiveID) override; - unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, + unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) override; static bool classifySymbolRef(const MCExpr *Expr, @@ -240,10 +252,10 @@ private: // the add<>Operands() calls. MCContext &Ctx; +public: AArch64Operand(KindTy K, MCContext &_Ctx) : MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {} -public: AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) { Kind = o.Kind; StartLoc = o.StartLoc; @@ -607,7 +619,11 @@ public: const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); if (!MCE) return false; - return AArch64_AM::isLogicalImmediate(MCE->getValue(), 32); + int64_t Val = MCE->getValue(); + if (Val >> 32 != 0 && Val >> 32 != ~0LL) + return false; + Val &= 0xFFFFFFFF; + return AArch64_AM::isLogicalImmediate(Val, 32); } bool isLogicalImm64() const { if (!isImm()) @@ -617,6 +633,23 @@ public: return false; return AArch64_AM::isLogicalImmediate(MCE->getValue(), 64); } + bool isLogicalImm32Not() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); + if (!MCE) + return false; + int64_t Val = ~MCE->getValue() & 0xFFFFFFFF; + return AArch64_AM::isLogicalImmediate(Val, 32); + } + bool isLogicalImm64Not() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); + if (!MCE) + return false; + return AArch64_AM::isLogicalImmediate(~MCE->getValue(), 64); + } bool isShiftedImm() const { return Kind == k_ShiftedImm; } bool isAddSubImm() const { if (!isShiftedImm() && !isImm()) @@ -1348,7 +1381,8 @@ public: assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); assert(MCE && "Invalid logical immediate operand!"); - uint64_t encoding = AArch64_AM::encodeLogicalImmediate(MCE->getValue(), 32); + uint64_t encoding = + AArch64_AM::encodeLogicalImmediate(MCE->getValue() & 0xFFFFFFFF, 32); Inst.addOperand(MCOperand::CreateImm(encoding)); } @@ -1360,6 +1394,22 @@ public: Inst.addOperand(MCOperand::CreateImm(encoding)); } + void addLogicalImm32NotOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); + int64_t Val = ~MCE->getValue() & 0xFFFFFFFF; + uint64_t encoding = AArch64_AM::encodeLogicalImmediate(Val, 32); + Inst.addOperand(MCOperand::CreateImm(encoding)); + } + + void addLogicalImm64NotOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); + uint64_t encoding = + AArch64_AM::encodeLogicalImmediate(~MCE->getValue(), 64); + Inst.addOperand(MCOperand::CreateImm(encoding)); + } + void addSIMDImmType10Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); @@ -1523,9 +1573,9 @@ public: void print(raw_ostream &OS) const override; - static AArch64Operand *CreateToken(StringRef Str, bool IsSuffix, SMLoc S, - MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_Token, Ctx); + static std::unique_ptr<AArch64Operand> + CreateToken(StringRef Str, bool IsSuffix, SMLoc S, MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_Token, Ctx); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->Tok.IsSuffix = IsSuffix; @@ -1534,9 +1584,9 @@ public: return Op; } - static AArch64Operand *CreateReg(unsigned RegNum, bool isVector, SMLoc S, - SMLoc E, MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_Register, Ctx); + static std::unique_ptr<AArch64Operand> + CreateReg(unsigned RegNum, bool isVector, SMLoc S, SMLoc E, MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_Register, Ctx); Op->Reg.RegNum = RegNum; Op->Reg.isVector = isVector; Op->StartLoc = S; @@ -1544,10 +1594,10 @@ public: return Op; } - static AArch64Operand *CreateVectorList(unsigned RegNum, unsigned Count, - unsigned NumElements, char ElementKind, - SMLoc S, SMLoc E, MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_VectorList, Ctx); + static std::unique_ptr<AArch64Operand> + CreateVectorList(unsigned RegNum, unsigned Count, unsigned NumElements, + char ElementKind, SMLoc S, SMLoc E, MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_VectorList, Ctx); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; Op->VectorList.NumElements = NumElements; @@ -1557,28 +1607,29 @@ public: return Op; } - static AArch64Operand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, - MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_VectorIndex, Ctx); + static std::unique_ptr<AArch64Operand> + CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_VectorIndex, Ctx); Op->VectorIndex.Val = Idx; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, - MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_Immediate, Ctx); + static std::unique_ptr<AArch64Operand> CreateImm(const MCExpr *Val, SMLoc S, + SMLoc E, MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_Immediate, Ctx); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static AArch64Operand *CreateShiftedImm(const MCExpr *Val, - unsigned ShiftAmount, SMLoc S, - SMLoc E, MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_ShiftedImm, Ctx); + static std::unique_ptr<AArch64Operand> CreateShiftedImm(const MCExpr *Val, + unsigned ShiftAmount, + SMLoc S, SMLoc E, + MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_ShiftedImm, Ctx); Op->ShiftedImm .Val = Val; Op->ShiftedImm.ShiftAmount = ShiftAmount; Op->StartLoc = S; @@ -1586,34 +1637,36 @@ public: return Op; } - static AArch64Operand *CreateCondCode(AArch64CC::CondCode Code, SMLoc S, - SMLoc E, MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_CondCode, Ctx); + static std::unique_ptr<AArch64Operand> + CreateCondCode(AArch64CC::CondCode Code, SMLoc S, SMLoc E, MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_CondCode, Ctx); Op->CondCode.Code = Code; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static AArch64Operand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_FPImm, Ctx); + static std::unique_ptr<AArch64Operand> CreateFPImm(unsigned Val, SMLoc S, + MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_FPImm, Ctx); Op->FPImm.Val = Val; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static AArch64Operand *CreateBarrier(unsigned Val, SMLoc S, MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_Barrier, Ctx); + static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val, SMLoc S, + MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_Barrier, Ctx); Op->Barrier.Val = Val; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S, - uint64_t FeatureBits, MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_SysReg, Ctx); + static std::unique_ptr<AArch64Operand> + CreateSysReg(StringRef Str, SMLoc S, uint64_t FeatureBits, MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_SysReg, Ctx); Op->SysReg.Data = Str.data(); Op->SysReg.Length = Str.size(); Op->SysReg.FeatureBits = FeatureBits; @@ -1622,27 +1675,28 @@ public: return Op; } - static AArch64Operand *CreateSysCR(unsigned Val, SMLoc S, SMLoc E, - MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_SysCR, Ctx); + static std::unique_ptr<AArch64Operand> CreateSysCR(unsigned Val, SMLoc S, + SMLoc E, MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_SysCR, Ctx); Op->SysCRImm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static AArch64Operand *CreatePrefetch(unsigned Val, SMLoc S, MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_Prefetch, Ctx); + static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val, SMLoc S, + MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_Prefetch, Ctx); Op->Prefetch.Val = Val; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static AArch64Operand *CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, - unsigned Val, bool HasExplicitAmount, - SMLoc S, SMLoc E, MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, Ctx); + static std::unique_ptr<AArch64Operand> + CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val, + bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_ShiftExtend, Ctx); Op->ShiftExtend.Type = ShOp; Op->ShiftExtend.Amount = Val; Op->ShiftExtend.HasExplicitAmount = HasExplicitAmount; @@ -1816,6 +1870,26 @@ bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, return (RegNo == (unsigned)-1); } +// Matches a register name or register alias previously defined by '.req' +unsigned AArch64AsmParser::matchRegisterNameAlias(StringRef Name, + bool isVector) { + unsigned RegNum = isVector ? matchVectorRegName(Name) + : MatchRegisterName(Name); + + if (RegNum == 0) { + // Check for aliases registered via .req. Canonicalize to lower case. + // That's more consistent since register names are case insensitive, and + // it's how the original entry was passed in from MC/MCParser/AsmParser. + auto Entry = RegisterReqs.find(Name.lower()); + if (Entry == RegisterReqs.end()) + return 0; + // set RegNum if the match is the right kind of register + if (isVector == Entry->getValue().first) + RegNum = Entry->getValue().second; + } + return RegNum; +} + /// tryParseRegister - Try to parse a register name. The token must be an /// Identifier when called, and if it is a register name the token is eaten and /// the register is added to the operand list. @@ -1824,7 +1898,7 @@ int AArch64AsmParser::tryParseRegister() { assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); std::string lowerCase = Tok.getString().lower(); - unsigned RegNum = MatchRegisterName(lowerCase); + unsigned RegNum = matchRegisterNameAlias(lowerCase, false); // Also handle a few aliases of registers. if (RegNum == 0) RegNum = StringSwitch<unsigned>(lowerCase) @@ -1854,7 +1928,8 @@ int AArch64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) { // a '.'. size_t Start = 0, Next = Name.find('.'); StringRef Head = Name.slice(Start, Next); - unsigned RegNum = matchVectorRegName(Head); + unsigned RegNum = matchRegisterNameAlias(Head, true); + if (RegNum) { if (Next != StringRef::npos) { Kind = Name.slice(Next, StringRef::npos); @@ -2183,8 +2258,11 @@ bool AArch64AsmParser::parseCondCode(OperandVector &Operands, return TokError("invalid condition code"); Parser.Lex(); // Eat identifier token. - if (invertCondCode) + if (invertCondCode) { + if (CC == AArch64CC::AL || CC == AArch64CC::NV) + return TokError("condition codes AL and NV are invalid for this instruction"); CC = AArch64CC::getInvertedCondCode(AArch64CC::CondCode(CC)); + } Operands.push_back( AArch64Operand::CreateCondCode(CC, S, getLoc(), getContext())); @@ -2849,7 +2927,7 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) { if (!Tok.is(AsmToken::Identifier)) return MatchOperand_NoMatch; - unsigned RegNum = MatchRegisterName(Tok.getString().lower()); + unsigned RegNum = matchRegisterNameAlias(Tok.getString().lower(), false); MCContext &Ctx = getContext(); const MCRegisterInfo *RI = Ctx.getRegisterInfo(); @@ -3000,6 +3078,43 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode, Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E, getContext())); return false; } + case AsmToken::Equal: { + SMLoc Loc = Parser.getTok().getLoc(); + if (Mnemonic != "ldr") // only parse for ldr pseudo (e.g. ldr r0, =val) + return Error(Loc, "unexpected token in operand"); + Parser.Lex(); // Eat '=' + const MCExpr *SubExprVal; + if (getParser().parseExpression(SubExprVal)) + return true; + + MCContext& Ctx = getContext(); + E = SMLoc::getFromPointer(Loc.getPointer() - 1); + // If the op is an imm and can be fit into a mov, then replace ldr with mov. + if (isa<MCConstantExpr>(SubExprVal) && Operands.size() >= 2 && + static_cast<AArch64Operand &>(*Operands[1]).isReg()) { + bool IsXReg = AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( + Operands[1]->getReg()); + uint64_t Imm = (cast<MCConstantExpr>(SubExprVal))->getValue(); + uint32_t ShiftAmt = 0, MaxShiftAmt = IsXReg ? 48 : 16; + while(Imm > 0xFFFF && countTrailingZeros(Imm) >= 16) { + ShiftAmt += 16; + Imm >>= 16; + } + if (ShiftAmt <= MaxShiftAmt && Imm <= 0xFFFF) { + Operands[0] = AArch64Operand::CreateToken("movz", false, Loc, Ctx); + Operands.push_back(AArch64Operand::CreateImm( + MCConstantExpr::Create(Imm, Ctx), S, E, Ctx)); + if (ShiftAmt) + Operands.push_back(AArch64Operand::CreateShiftExtend(AArch64_AM::LSL, + ShiftAmt, true, S, E, Ctx)); + return false; + } + } + // If it is a label or an imm that cannot fit in a movz, put it into CP. + const MCExpr *CPLoc = getTargetStreamer().addConstantPoolEntry(SubExprVal); + Operands.push_back(AArch64Operand::CreateImm(CPLoc, S, E, Ctx)); + return false; + } } } @@ -3029,6 +3144,15 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, .Case("bnv", "b.nv") .Default(Name); + // First check for the AArch64-specific .req directive. + if (Parser.getTok().is(AsmToken::Identifier) && + Parser.getTok().getIdentifier() == ".req") { + parseDirectiveReq(Name, NameLoc); + // We always return 'error' for this, as we're done with this + // statement and don't need to match the 'instruction." + return true; + } + // Create the leading tokens for the mnemonic, split by '.' characters. size_t Start = 0, Next = Name.find('.'); StringRef Head = Name.slice(Start, Next); @@ -3443,8 +3567,7 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) { case Match_MnemonicFail: return Error(Loc, "unrecognized instruction mnemonic"); default: - assert(0 && "unexpected error code!"); - return Error(Loc, "invalid instruction format"); + llvm_unreachable("unexpected error code!"); } } @@ -3456,23 +3579,23 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, unsigned &ErrorInfo, bool MatchingInlineAsm) { assert(!Operands.empty() && "Unexpect empty operand list!"); - AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[0]); - assert(Op->isToken() && "Leading operand should always be a mnemonic!"); + AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[0]); + assert(Op.isToken() && "Leading operand should always be a mnemonic!"); - StringRef Tok = Op->getToken(); + StringRef Tok = Op.getToken(); unsigned NumOperands = Operands.size(); if (NumOperands == 4 && Tok == "lsl") { - AArch64Operand *Op2 = static_cast<AArch64Operand *>(Operands[2]); - AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]); - if (Op2->isReg() && Op3->isImm()) { - const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm()); + AArch64Operand &Op2 = static_cast<AArch64Operand &>(*Operands[2]); + AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]); + if (Op2.isReg() && Op3.isImm()) { + const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm()); if (Op3CE) { uint64_t Op3Val = Op3CE->getValue(); uint64_t NewOp3Val = 0; uint64_t NewOp4Val = 0; if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains( - Op2->getReg())) { + Op2.getReg())) { NewOp3Val = (32 - Op3Val) & 0x1f; NewOp4Val = 31 - Op3Val; } else { @@ -3484,26 +3607,24 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, const MCExpr *NewOp4 = MCConstantExpr::Create(NewOp4Val, getContext()); Operands[0] = AArch64Operand::CreateToken( - "ubfm", false, Op->getStartLoc(), getContext()); - Operands[3] = AArch64Operand::CreateImm(NewOp3, Op3->getStartLoc(), - Op3->getEndLoc(), getContext()); + "ubfm", false, Op.getStartLoc(), getContext()); Operands.push_back(AArch64Operand::CreateImm( - NewOp4, Op3->getStartLoc(), Op3->getEndLoc(), getContext())); - delete Op3; - delete Op; + NewOp4, Op3.getStartLoc(), Op3.getEndLoc(), getContext())); + Operands[3] = AArch64Operand::CreateImm(NewOp3, Op3.getStartLoc(), + Op3.getEndLoc(), getContext()); } } } else if (NumOperands == 5) { // FIXME: Horrible hack to handle the BFI -> BFM, SBFIZ->SBFM, and // UBFIZ -> UBFM aliases. if (Tok == "bfi" || Tok == "sbfiz" || Tok == "ubfiz") { - AArch64Operand *Op1 = static_cast<AArch64Operand *>(Operands[1]); - AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]); - AArch64Operand *Op4 = static_cast<AArch64Operand *>(Operands[4]); + AArch64Operand &Op1 = static_cast<AArch64Operand &>(*Operands[1]); + AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]); + AArch64Operand &Op4 = static_cast<AArch64Operand &>(*Operands[4]); - if (Op1->isReg() && Op3->isImm() && Op4->isImm()) { - const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm()); - const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4->getImm()); + if (Op1.isReg() && Op3.isImm() && Op4.isImm()) { + const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm()); + const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4.getImm()); if (Op3CE && Op4CE) { uint64_t Op3Val = Op3CE->getValue(); @@ -3511,21 +3632,21 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, uint64_t RegWidth = 0; if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( - Op1->getReg())) + Op1.getReg())) RegWidth = 64; else RegWidth = 32; if (Op3Val >= RegWidth) - return Error(Op3->getStartLoc(), + return Error(Op3.getStartLoc(), "expected integer in range [0, 31]"); if (Op4Val < 1 || Op4Val > RegWidth) - return Error(Op4->getStartLoc(), + return Error(Op4.getStartLoc(), "expected integer in range [1, 32]"); uint64_t NewOp3Val = 0; if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains( - Op1->getReg())) + Op1.getReg())) NewOp3Val = (32 - Op3Val) & 0x1f; else NewOp3Val = (64 - Op3Val) & 0x3f; @@ -3533,7 +3654,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, uint64_t NewOp4Val = Op4Val - 1; if (NewOp3Val != 0 && NewOp4Val >= NewOp3Val) - return Error(Op4->getStartLoc(), + return Error(Op4.getStartLoc(), "requested insert overflows register"); const MCExpr *NewOp3 = @@ -3541,24 +3662,20 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, const MCExpr *NewOp4 = MCConstantExpr::Create(NewOp4Val, getContext()); Operands[3] = AArch64Operand::CreateImm( - NewOp3, Op3->getStartLoc(), Op3->getEndLoc(), getContext()); + NewOp3, Op3.getStartLoc(), Op3.getEndLoc(), getContext()); Operands[4] = AArch64Operand::CreateImm( - NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext()); + NewOp4, Op4.getStartLoc(), Op4.getEndLoc(), getContext()); if (Tok == "bfi") Operands[0] = AArch64Operand::CreateToken( - "bfm", false, Op->getStartLoc(), getContext()); + "bfm", false, Op.getStartLoc(), getContext()); else if (Tok == "sbfiz") Operands[0] = AArch64Operand::CreateToken( - "sbfm", false, Op->getStartLoc(), getContext()); + "sbfm", false, Op.getStartLoc(), getContext()); else if (Tok == "ubfiz") Operands[0] = AArch64Operand::CreateToken( - "ubfm", false, Op->getStartLoc(), getContext()); + "ubfm", false, Op.getStartLoc(), getContext()); else llvm_unreachable("No valid mnemonic for alias?"); - - delete Op; - delete Op3; - delete Op4; } } @@ -3566,13 +3683,13 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // UBFX -> UBFM aliases. } else if (NumOperands == 5 && (Tok == "bfxil" || Tok == "sbfx" || Tok == "ubfx")) { - AArch64Operand *Op1 = static_cast<AArch64Operand *>(Operands[1]); - AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]); - AArch64Operand *Op4 = static_cast<AArch64Operand *>(Operands[4]); + AArch64Operand &Op1 = static_cast<AArch64Operand &>(*Operands[1]); + AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]); + AArch64Operand &Op4 = static_cast<AArch64Operand &>(*Operands[4]); - if (Op1->isReg() && Op3->isImm() && Op4->isImm()) { - const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm()); - const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4->getImm()); + if (Op1.isReg() && Op3.isImm() && Op4.isImm()) { + const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm()); + const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4.getImm()); if (Op3CE && Op4CE) { uint64_t Op3Val = Op3CE->getValue(); @@ -3580,42 +3697,39 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, uint64_t RegWidth = 0; if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( - Op1->getReg())) + Op1.getReg())) RegWidth = 64; else RegWidth = 32; if (Op3Val >= RegWidth) - return Error(Op3->getStartLoc(), + return Error(Op3.getStartLoc(), "expected integer in range [0, 31]"); if (Op4Val < 1 || Op4Val > RegWidth) - return Error(Op4->getStartLoc(), + return Error(Op4.getStartLoc(), "expected integer in range [1, 32]"); uint64_t NewOp4Val = Op3Val + Op4Val - 1; if (NewOp4Val >= RegWidth || NewOp4Val < Op3Val) - return Error(Op4->getStartLoc(), + return Error(Op4.getStartLoc(), "requested extract overflows register"); const MCExpr *NewOp4 = MCConstantExpr::Create(NewOp4Val, getContext()); Operands[4] = AArch64Operand::CreateImm( - NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext()); + NewOp4, Op4.getStartLoc(), Op4.getEndLoc(), getContext()); if (Tok == "bfxil") Operands[0] = AArch64Operand::CreateToken( - "bfm", false, Op->getStartLoc(), getContext()); + "bfm", false, Op.getStartLoc(), getContext()); else if (Tok == "sbfx") Operands[0] = AArch64Operand::CreateToken( - "sbfm", false, Op->getStartLoc(), getContext()); + "sbfm", false, Op.getStartLoc(), getContext()); else if (Tok == "ubfx") Operands[0] = AArch64Operand::CreateToken( - "ubfm", false, Op->getStartLoc(), getContext()); + "ubfm", false, Op.getStartLoc(), getContext()); else llvm_unreachable("No valid mnemonic for alias?"); - - delete Op; - delete Op4; } } } @@ -3626,63 +3740,58 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, if (NumOperands == 3 && (Tok == "sxtw" || Tok == "uxtw")) { // The source register can be Wn here, but the matcher expects a // GPR64. Twiddle it here if necessary. - AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[2]); - if (Op->isReg()) { - unsigned Reg = getXRegFromWReg(Op->getReg()); - Operands[2] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(), - Op->getEndLoc(), getContext()); - delete Op; + AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]); + if (Op.isReg()) { + unsigned Reg = getXRegFromWReg(Op.getReg()); + Operands[2] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(), + Op.getEndLoc(), getContext()); } } // FIXME: Likewise for sxt[bh] with a Xd dst operand else if (NumOperands == 3 && (Tok == "sxtb" || Tok == "sxth")) { - AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]); - if (Op->isReg() && + AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]); + if (Op.isReg() && AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( - Op->getReg())) { + Op.getReg())) { // The source register can be Wn here, but the matcher expects a // GPR64. Twiddle it here if necessary. - AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[2]); - if (Op->isReg()) { - unsigned Reg = getXRegFromWReg(Op->getReg()); - Operands[2] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(), - Op->getEndLoc(), getContext()); - delete Op; + AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]); + if (Op.isReg()) { + unsigned Reg = getXRegFromWReg(Op.getReg()); + Operands[2] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(), + Op.getEndLoc(), getContext()); } } } // FIXME: Likewise for uxt[bh] with a Xd dst operand else if (NumOperands == 3 && (Tok == "uxtb" || Tok == "uxth")) { - AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]); - if (Op->isReg() && + AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]); + if (Op.isReg() && AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( - Op->getReg())) { + Op.getReg())) { // The source register can be Wn here, but the matcher expects a // GPR32. Twiddle it here if necessary. - AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]); - if (Op->isReg()) { - unsigned Reg = getWRegFromXReg(Op->getReg()); - Operands[1] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(), - Op->getEndLoc(), getContext()); - delete Op; + AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]); + if (Op.isReg()) { + unsigned Reg = getWRegFromXReg(Op.getReg()); + Operands[1] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(), + Op.getEndLoc(), getContext()); } } } // Yet another horrible hack to handle FMOV Rd, #0.0 using [WX]ZR. if (NumOperands == 3 && Tok == "fmov") { - AArch64Operand *RegOp = static_cast<AArch64Operand *>(Operands[1]); - AArch64Operand *ImmOp = static_cast<AArch64Operand *>(Operands[2]); - if (RegOp->isReg() && ImmOp->isFPImm() && - ImmOp->getFPImm() == (unsigned)-1) { + AArch64Operand &RegOp = static_cast<AArch64Operand &>(*Operands[1]); + AArch64Operand &ImmOp = static_cast<AArch64Operand &>(*Operands[2]); + if (RegOp.isReg() && ImmOp.isFPImm() && ImmOp.getFPImm() == (unsigned)-1) { unsigned zreg = AArch64MCRegisterClasses[AArch64::FPR32RegClassID].contains( - RegOp->getReg()) + RegOp.getReg()) ? AArch64::WZR : AArch64::XZR; - Operands[2] = AArch64Operand::CreateReg(zreg, false, Op->getStartLoc(), - Op->getEndLoc(), getContext()); - delete ImmOp; + Operands[2] = AArch64Operand::CreateReg(zreg, false, Op.getStartLoc(), + Op.getEndLoc(), getContext()); } } @@ -3735,14 +3844,14 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction"); - ErrorLoc = ((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(); + ErrorLoc = ((AArch64Operand &)*Operands[ErrorInfo]).getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; } // If the match failed on a suffix token operand, tweak the diagnostic // accordingly. - if (((AArch64Operand *)Operands[ErrorInfo])->isToken() && - ((AArch64Operand *)Operands[ErrorInfo])->isTokenSuffix()) + if (((AArch64Operand &)*Operands[ErrorInfo]).isToken() && + ((AArch64Operand &)*Operands[ErrorInfo]).isTokenSuffix()) MatchResult = Match_InvalidSuffix; return showMatchError(ErrorLoc, MatchResult); @@ -3794,9 +3903,11 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidLabel: case Match_MSR: case Match_MRS: { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); // Any time we get here, there's nothing fancy to do. Just get the // operand SMLoc and display the diagnostic. - SMLoc ErrorLoc = ((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(); + SMLoc ErrorLoc = ((AArch64Operand &)*Operands[ErrorInfo]).getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; return showMatchError(ErrorLoc, MatchResult); @@ -3819,6 +3930,10 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveWord(8, Loc); if (IDVal == ".tlsdesccall") return parseDirectiveTLSDescCall(Loc); + if (IDVal == ".ltorg" || IDVal == ".pool") + return parseDirectiveLtorg(Loc); + if (IDVal == ".unreq") + return parseDirectiveUnreq(DirectiveID.getLoc()); return parseDirectiveLOH(IDVal, Loc); } @@ -3920,6 +4035,66 @@ bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) { return false; } +/// parseDirectiveLtorg +/// ::= .ltorg | .pool +bool AArch64AsmParser::parseDirectiveLtorg(SMLoc L) { + getTargetStreamer().emitCurrentConstantPool(); + return false; +} + +/// parseDirectiveReq +/// ::= name .req registername +bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { + Parser.Lex(); // Eat the '.req' token. + SMLoc SRegLoc = getLoc(); + unsigned RegNum = tryParseRegister(); + bool IsVector = false; + + if (RegNum == static_cast<unsigned>(-1)) { + StringRef Kind; + RegNum = tryMatchVectorRegister(Kind, false); + if (!Kind.empty()) { + Error(SRegLoc, "vector register without type specifier expected"); + return false; + } + IsVector = true; + } + + if (RegNum == static_cast<unsigned>(-1)) { + Parser.eatToEndOfStatement(); + Error(SRegLoc, "register name or alias expected"); + return false; + } + + // Shouldn't be anything else. + if (Parser.getTok().isNot(AsmToken::EndOfStatement)) { + Error(Parser.getTok().getLoc(), "unexpected input in .req directive"); + Parser.eatToEndOfStatement(); + return false; + } + + Parser.Lex(); // Consume the EndOfStatement + + auto pair = std::make_pair(IsVector, RegNum); + if (RegisterReqs.GetOrCreateValue(Name, pair).getValue() != pair) + Warning(L, "ignoring redefinition of register alias '" + Name + "'"); + + return true; +} + +/// parseDirectiveUneq +/// ::= .unreq registername +bool AArch64AsmParser::parseDirectiveUnreq(SMLoc L) { + if (Parser.getTok().isNot(AsmToken::Identifier)) { + Error(Parser.getTok().getLoc(), "unexpected input in .unreq directive."); + Parser.eatToEndOfStatement(); + return false; + } + RegisterReqs.erase(Parser.getTok().getIdentifier().lower()); + Parser.Lex(); // Eat the identifier. + return false; +} + bool AArch64AsmParser::classifySymbolRef(const MCExpr *Expr, AArch64MCExpr::VariantKind &ELFRefKind, @@ -3986,9 +4161,9 @@ extern "C" void LLVMInitializeAArch64AsmParser() { // Define this matcher function after the auto-generated include so we // have the match class enum definitions. -unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, +unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, unsigned Kind) { - AArch64Operand *Op = static_cast<AArch64Operand *>(AsmOp); + AArch64Operand &Op = static_cast<AArch64Operand &>(AsmOp); // If the kind is a token for a literal immediate, check if our asm // operand matches. This is for InstAliases which have a fixed-value // immediate in the syntax. @@ -4036,9 +4211,9 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, ExpectedVal = 8; break; } - if (!Op->isImm()) + if (!Op.isImm()) return Match_InvalidOperand; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm()); if (!CE) return Match_InvalidOperand; if (CE->getValue() == ExpectedVal) diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp index 2466368..2057c51 100644 --- a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp @@ -37,8 +37,7 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) { case LLVMDisassembler_VariantKind_ARM64_TLVP: case LLVMDisassembler_VariantKind_ARM64_TLVOFF: default: - assert(0 && "bad LLVMDisassembler_VariantKind"); - return MCSymbolRefExpr::VK_None; + llvm_unreachable("bad LLVMDisassembler_VariantKind"); } } diff --git a/lib/Target/AArch64/Disassembler/CMakeLists.txt b/lib/Target/AArch64/Disassembler/CMakeLists.txt index be4ccad..d64c05b 100644 --- a/lib/Target/AArch64/Disassembler/CMakeLists.txt +++ b/lib/Target/AArch64/Disassembler/CMakeLists.txt @@ -4,11 +4,5 @@ add_llvm_library(LLVMAArch64Disassembler AArch64Disassembler.cpp AArch64ExternalSymbolizer.cpp ) -# workaround for hanging compilation on MSVC8, 9 and 10 -#if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) -#set_property( -# SOURCE ARMDisassembler.cpp -# PROPERTY COMPILE_FLAGS "/Od" -# ) -#endif() + add_dependencies(LLVMAArch64Disassembler AArch64CommonTableGen) diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index f484a5b..8a21f06 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -918,7 +918,7 @@ void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo, else O << getRegisterName(Reg); } else - assert(0 && "unknown operand kind in printPostIncOperand64"); + llvm_unreachable("unknown operand kind in printPostIncOperand64"); } void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo, @@ -1109,7 +1109,7 @@ static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) { while (Stride--) { switch (Reg) { default: - assert(0 && "Vector register expected!"); + llvm_unreachable("Vector register expected!"); case AArch64::Q0: Reg = AArch64::Q1; break; case AArch64::Q1: Reg = AArch64::Q2; break; case AArch64::Q2: Reg = AArch64::Q3; break; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index d8900d4..a917616 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -86,7 +86,7 @@ public: static unsigned getFixupKindNumBytes(unsigned Kind) { switch (Kind) { default: - assert(0 && "Unknown fixup kind!"); + llvm_unreachable("Unknown fixup kind!"); case AArch64::fixup_aarch64_tlsdesc_call: return 0; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index dc4a8bf..1763b40 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -96,4 +96,6 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(StringRef TT) { ExceptionsType = ExceptionHandling::DwarfCFI; UseIntegratedAssembler = true; + + HasIdentDirective = true; } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 464a18c..f051357 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -218,13 +218,9 @@ AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, const MCSubtargetInfo &STI) const { if (MO.isReg()) return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); - else { - assert(MO.isImm() && "did not expect relocated expression"); - return static_cast<unsigned>(MO.getImm()); - } - assert(0 && "Unable to encode MCOperand!"); - return 0; + assert(MO.isImm() && "did not expect relocated expression"); + return static_cast<unsigned>(MO.getImm()); } template<unsigned FixupKind> uint32_t diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp index 85c3ec7..42a6787 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -81,37 +81,8 @@ void AArch64MCExpr::PrintImpl(raw_ostream &OS) const { OS << *Expr; } -// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps -// that method should be made public? -// FIXME: really do above: now that two backends are using it. -static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) { - switch (Value->getKind()) { - case MCExpr::Target: - llvm_unreachable("Can't handle nested target expr!"); - break; - - case MCExpr::Constant: - break; - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); - AddValueSymbolsImpl(BE->getLHS(), Asm); - AddValueSymbolsImpl(BE->getRHS(), Asm); - break; - } - - case MCExpr::SymbolRef: - Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol()); - break; - - case MCExpr::Unary: - AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm); - break; - } -} - -void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const { - AddValueSymbolsImpl(getSubExpr(), Asm); +void AArch64MCExpr::visitUsedExpr(MCStreamer &Streamer) const { + Streamer.visitUsedExpr(*getSubExpr()); } const MCSection *AArch64MCExpr::FindAssociatedSection() const { diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h index e869ed0..5422f9d 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -147,7 +147,7 @@ public: void PrintImpl(raw_ostream &OS) const override; - void AddValueSymbols(MCAssembler *) const override; + void visitUsedExpr(MCStreamer &Streamer) const override; const MCSection *FindAssociatedSection() const override; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index 5c86189..ba95366 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -75,7 +75,7 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo( Log2Size = llvm::Log2_32(4); switch (Sym->getKind()) { default: - assert(0 && "Unexpected symbol reference variant kind!"); + llvm_unreachable("Unexpected symbol reference variant kind!"); case MCSymbolRefExpr::VK_PAGEOFF: RelocType = unsigned(MachO::ARM64_RELOC_PAGEOFF12); return true; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp new file mode 100644 index 0000000..f9aeb35 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp @@ -0,0 +1,40 @@ +//===- AArch64TargetStreamer.cpp - AArch64TargetStreamer class --*- C++ -*---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AArch64TargetStreamer class. +// +//===----------------------------------------------------------------------===// +#include "llvm/ADT/MapVector.h" +#include "llvm/MC/ConstantPools.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" + +using namespace llvm; + +// +// AArch64TargetStreamer Implemenation +// +AArch64TargetStreamer::AArch64TargetStreamer(MCStreamer &S) + : MCTargetStreamer(S), ConstantPools(new AssemblerConstantPools()) {} + +AArch64TargetStreamer::~AArch64TargetStreamer() {} + +// The constant pool handling is shared by all AArch64TargetStreamer +// implementations. +const MCExpr *AArch64TargetStreamer::addConstantPoolEntry(const MCExpr *Expr) { + return ConstantPools->addEntry(Streamer, Expr); +} + +void AArch64TargetStreamer::emitCurrentConstantPool() { + ConstantPools->emitForCurrentSection(Streamer); +} + +// finish() - write out any non-empty assembler constant pools. +void AArch64TargetStreamer::finish() { ConstantPools->emitAll(Streamer); } diff --git a/lib/Target/AArch64/MCTargetDesc/Android.mk b/lib/Target/AArch64/MCTargetDesc/Android.mk index e9d2323..a23c0e5 100644 --- a/lib/Target/AArch64/MCTargetDesc/Android.mk +++ b/lib/Target/AArch64/MCTargetDesc/Android.mk @@ -14,7 +14,8 @@ aarch64_mc_desc_SRC_FILES := \ AArch64MCAsmInfo.cpp \ AArch64MCCodeEmitter.cpp \ AArch64MCExpr.cpp \ - AArch64MCTargetDesc.cpp + AArch64MCTargetDesc.cpp \ + AArch64TargetStreamer.cpp # For the host # ===================================================== diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt index 7d5bced..6d8be5e 100644 --- a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt @@ -7,6 +7,7 @@ add_llvm_library(LLVMAArch64Desc AArch64MCExpr.cpp AArch64MCTargetDesc.cpp AArch64MachObjectWriter.cpp + AArch64TargetStreamer.cpp ) add_dependencies(LLVMAArch64Desc AArch64CommonTableGen) diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 9e4c389..9d2ce21 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -233,23 +233,9 @@ inline static const char *getCondCodeName(CondCode Code) { } inline static CondCode getInvertedCondCode(CondCode Code) { - switch (Code) { - default: llvm_unreachable("Unknown condition code"); - case EQ: return NE; - case NE: return EQ; - case HS: return LO; - case LO: return HS; - case MI: return PL; - case PL: return MI; - case VS: return VC; - case VC: return VS; - case HI: return LS; - case LS: return HI; - case GE: return LT; - case LT: return GE; - case GT: return LE; - case LE: return GT; - } + // To reverse a condition it's necessary to only invert the low bit: + + return static_cast<CondCode>(static_cast<unsigned>(Code) ^ 0x1); } /// Given a condition code, return NZCV flags that would satisfy that condition. |