diff options
Diffstat (limited to 'lib/Target/AArch64')
36 files changed, 956 insertions, 647 deletions
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index dff48f9..bb3db4b 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -32,6 +32,9 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", "Enable ARMv8 CRC-32 checksum instructions">; +def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true", + "Enable ARMv8.1a extensions", [FeatureCRC]>; + /// Cyclone has register move instructions which are "free". def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", "Has zero-cycle register moves">; @@ -89,6 +92,10 @@ def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8, FeatureNEON, FeatureCRC]>; +def : ProcessorModel<"generic-armv8.1-a", NoSchedModel, [FeatureV8_1a, + FeatureNEON, + FeatureCrypto]>; + def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>; def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>; // FIXME: Cortex-A72 is currently modelled as an Cortex-A57. diff --git a/lib/Target/AArch64/AArch64A53Fix835769.cpp b/lib/Target/AArch64/AArch64A53Fix835769.cpp index dd401c6..3bc5a54 100644 --- a/lib/Target/AArch64/AArch64A53Fix835769.cpp +++ b/lib/Target/AArch64/AArch64A53Fix835769.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp index 2cf3c22..bffd9e6 100644 --- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp +++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp @@ -142,7 +142,7 @@ private: int scavengeRegister(Chain *G, Color C, MachineBasicBlock &MBB); void scanInstruction(MachineInstr *MI, unsigned Idx, std::map<unsigned, Chain*> &Active, - std::set<std::unique_ptr<Chain>> &AllChains); + std::vector<std::unique_ptr<Chain>> &AllChains); void maybeKillChain(MachineOperand &MO, unsigned Idx, std::map<unsigned, Chain*> &RegChains); Color getColor(unsigned Register); @@ -287,12 +287,12 @@ public: raw_string_ostream OS(S); OS << "{"; - StartInst->print(OS, NULL, true); + StartInst->print(OS, /* SkipOpers= */true); OS << " -> "; - LastInst->print(OS, NULL, true); + LastInst->print(OS, /* SkipOpers= */true); if (KillInst) { OS << " (kill @ "; - KillInst->print(OS, NULL, true); + KillInst->print(OS, /* SkipOpers= */true); OS << ")"; } OS << "}"; @@ -307,6 +307,11 @@ public: //===----------------------------------------------------------------------===// bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction &F) { + // Don't do anything if this isn't an A53 or A57. + if (!(F.getSubtarget<AArch64Subtarget>().isCortexA53() || + F.getSubtarget<AArch64Subtarget>().isCortexA57())) + return false; + bool Changed = false; DEBUG(dbgs() << "***** AArch64A57FPLoadBalancing *****\n"); @@ -331,7 +336,7 @@ bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) { // been killed yet. This is keyed by register - all chains can only have one // "link" register between each inst in the chain. std::map<unsigned, Chain*> ActiveChains; - std::set<std::unique_ptr<Chain>> AllChains; + std::vector<std::unique_ptr<Chain>> AllChains; unsigned Idx = 0; for (auto &MI : MBB) scanInstruction(&MI, Idx++, ActiveChains, AllChains); @@ -598,10 +603,9 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C, return Changed; } -void AArch64A57FPLoadBalancing:: -scanInstruction(MachineInstr *MI, unsigned Idx, - std::map<unsigned, Chain*> &ActiveChains, - std::set<std::unique_ptr<Chain>> &AllChains) { +void AArch64A57FPLoadBalancing::scanInstruction( + MachineInstr *MI, unsigned Idx, std::map<unsigned, Chain *> &ActiveChains, + std::vector<std::unique_ptr<Chain>> &AllChains) { // Inspect "MI", updating ActiveChains and AllChains. if (isMul(MI)) { @@ -620,7 +624,7 @@ scanInstruction(MachineInstr *MI, unsigned Idx, auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg)); ActiveChains[DestReg] = G.get(); - AllChains.insert(std::move(G)); + AllChains.push_back(std::move(G)); } else if (isMla(MI)) { @@ -664,7 +668,7 @@ scanInstruction(MachineInstr *MI, unsigned Idx, << TRI->getName(DestReg) << "\n"); auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg)); ActiveChains[DestReg] = G.get(); - AllChains.insert(std::move(G)); + AllChains.push_back(std::move(G)); } else { diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp index 287989f..716e1a3 100644 --- a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp +++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp @@ -41,6 +41,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index d64d851..1b4483a 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -12,12 +12,14 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/AArch64AddressingModes.h" #include "AArch64.h" #include "AArch64MCInstLower.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" #include "InstPrinter/AArch64InstPrinter.h" +#include "MCTargetDesc/AArch64MCExpr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" @@ -34,8 +36,10 @@ #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCLinkerOptimizationHint.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "asm-printer" @@ -49,7 +53,7 @@ class AArch64AsmPrinter : public AsmPrinter { public: AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this), - SM(*this), AArch64FI(nullptr), LOHLabelCounter(0) {} + SM(*this), AArch64FI(nullptr) {} const char *getPassName() const override { return "AArch64 Assembly Printer"; @@ -110,7 +114,6 @@ private: typedef std::map<const MachineInstr *, MCSymbol *> MInstToMCSymbol; MInstToMCSymbol LOHInstToLabel; - unsigned LOHLabelCounter; }; } // end of anonymous namespace @@ -219,6 +222,17 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, O << '#' << Imm; break; } + case MachineOperand::MO_GlobalAddress: { + const GlobalValue *GV = MO.getGlobal(); + MCSymbol *Sym = getSymbol(GV); + + // FIXME: Can we get anything other than a plain symbol here? + assert(!MO.getTargetFlags() && "Unknown operand target flag!"); + + O << *Sym; + printOffset(MO.getOffset(), O); + break; + } } } @@ -450,7 +464,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { if (AArch64FI->getLOHRelated().count(MI)) { // Generate a label for LOH related instruction - MCSymbol *LOHLabel = GetTempSymbol("loh", LOHLabelCounter++); + MCSymbol *LOHLabel = createTempSymbol("loh"); // Associate the instruction with the label LOHInstToLabel[MI] = LOHLabel; OutStreamer.EmitLabel(LOHLabel); @@ -489,24 +503,57 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitToStreamer(OutStreamer, TmpInst); return; } - case AArch64::TLSDESC_BLR: { - MCOperand Callee, Sym; - MCInstLowering.lowerOperand(MI->getOperand(0), Callee); - MCInstLowering.lowerOperand(MI->getOperand(1), Sym); - - // First emit a relocation-annotation. This expands to no code, but requests + case AArch64::TLSDESC_CALLSEQ: { + /// lower this to: + /// adrp x0, :tlsdesc:var + /// ldr x1, [x0, #:tlsdesc_lo12:var] + /// add x0, x0, #:tlsdesc_lo12:var + /// .tlsdesccall var + /// blr x1 + /// (TPIDR_EL0 offset now in x0) + const MachineOperand &MO_Sym = MI->getOperand(0); + MachineOperand MO_TLSDESC_LO12(MO_Sym), MO_TLSDESC(MO_Sym); + MCOperand Sym, SymTLSDescLo12, SymTLSDesc; + MO_TLSDESC_LO12.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | + AArch64II::MO_NC); + MO_TLSDESC.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGE); + MCInstLowering.lowerOperand(MO_Sym, Sym); + MCInstLowering.lowerOperand(MO_TLSDESC_LO12, SymTLSDescLo12); + MCInstLowering.lowerOperand(MO_TLSDESC, SymTLSDesc); + + MCInst Adrp; + Adrp.setOpcode(AArch64::ADRP); + Adrp.addOperand(MCOperand::CreateReg(AArch64::X0)); + Adrp.addOperand(SymTLSDesc); + EmitToStreamer(OutStreamer, Adrp); + + MCInst Ldr; + Ldr.setOpcode(AArch64::LDRXui); + Ldr.addOperand(MCOperand::CreateReg(AArch64::X1)); + Ldr.addOperand(MCOperand::CreateReg(AArch64::X0)); + Ldr.addOperand(SymTLSDescLo12); + Ldr.addOperand(MCOperand::CreateImm(0)); + EmitToStreamer(OutStreamer, Ldr); + + MCInst Add; + Add.setOpcode(AArch64::ADDXri); + Add.addOperand(MCOperand::CreateReg(AArch64::X0)); + Add.addOperand(MCOperand::CreateReg(AArch64::X0)); + Add.addOperand(SymTLSDescLo12); + Add.addOperand(MCOperand::CreateImm(AArch64_AM::getShiftValue(0))); + EmitToStreamer(OutStreamer, Add); + + // Emit a relocation-annotation. This expands to no code, but requests // the following instruction gets an R_AARCH64_TLSDESC_CALL. MCInst TLSDescCall; TLSDescCall.setOpcode(AArch64::TLSDESCCALL); TLSDescCall.addOperand(Sym); EmitToStreamer(OutStreamer, TLSDescCall); - // Other than that it's just a normal indirect call to the function loaded - // from the descriptor. - MCInst BLR; - BLR.setOpcode(AArch64::BLR); - BLR.addOperand(Callee); - EmitToStreamer(OutStreamer, BLR); + MCInst Blr; + Blr.setOpcode(AArch64::BLR); + Blr.addOperand(MCOperand::CreateReg(AArch64::X1)); + EmitToStreamer(OutStreamer, Blr); return; } diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp index 3b74481..06ff9af 100644 --- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp +++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp @@ -62,10 +62,10 @@ struct LDTLSCleanup : public MachineFunctionPass { for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { switch (I->getOpcode()) { - case AArch64::TLSDESC_BLR: + case AArch64::TLSDESC_CALLSEQ: // Make sure it's a local dynamic access. - if (!I->getOperand(1).isSymbol() || - strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_")) + if (!I->getOperand(0).isSymbol() || + strcmp(I->getOperand(0).getSymbolName(), "_TLS_MODULE_BASE_")) break; if (TLSBaseAddrReg) diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp index 938dcb3..568f258 100644 --- a/lib/Target/AArch64/AArch64CollectLOH.cpp +++ b/lib/Target/AArch64/AArch64CollectLOH.cpp @@ -279,7 +279,7 @@ static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg, /// definition. It also consider definitions of ADRP instructions as uses and /// ignore other uses. The ADRPMode is used to collect the information for LHO /// that involve ADRP operation only. -static void initReachingDef(MachineFunction &MF, +static void initReachingDef(const MachineFunction &MF, InstrToInstrs *ColorOpToReachedUses, BlockToInstrPerColor &Gen, BlockToRegSet &Kill, BlockToSetOfInstrsPerColor &ReachableUses, @@ -288,7 +288,7 @@ static void initReachingDef(MachineFunction &MF, const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); unsigned NbReg = RegToId.size(); - for (MachineBasicBlock &MBB : MF) { + for (const MachineBasicBlock &MBB : MF) { auto &BBGen = Gen[&MBB]; BBGen = make_unique<const MachineInstr *[]>(NbReg); std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr); @@ -382,7 +382,7 @@ static void initReachingDef(MachineFunction &MF, /// op.reachedUses /// /// Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) -static void reachingDefAlgorithm(MachineFunction &MF, +static void reachingDefAlgorithm(const MachineFunction &MF, InstrToInstrs *ColorOpToReachedUses, BlockToSetOfInstrsPerColor &In, BlockToSetOfInstrsPerColor &Out, @@ -392,7 +392,7 @@ static void reachingDefAlgorithm(MachineFunction &MF, bool HasChanged; do { HasChanged = false; - for (MachineBasicBlock &MBB : MF) { + for (const MachineBasicBlock &MBB : MF) { unsigned CurReg; for (CurReg = 0; CurReg < NbReg; ++CurReg) { SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg); @@ -401,7 +401,7 @@ static void reachingDefAlgorithm(MachineFunction &MF, SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg); unsigned Size = BBOutSet.size(); // In[bb][color] = U Out[bb.predecessors][color] - for (MachineBasicBlock *PredMBB : MBB.predecessors()) { + for (const MachineBasicBlock *PredMBB : MBB.predecessors()) { SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg); BBInSet.insert(PredOutSet.begin(), PredOutSet.end()); } @@ -433,7 +433,7 @@ static void reachingDefAlgorithm(MachineFunction &MF, /// @p DummyOp. /// \pre ColorOpToReachedUses is an array of at least number of registers of /// InstrToInstrs. -static void reachingDef(MachineFunction &MF, +static void reachingDef(const MachineFunction &MF, InstrToInstrs *ColorOpToReachedUses, const MapRegToId &RegToId, bool ADRPMode = false, const MachineInstr *DummyOp = nullptr) { @@ -983,7 +983,7 @@ static void computeOthers(const InstrToInstrs &UseToDefs, /// Look for every register defined by potential LOHs candidates. /// Map these registers with dense id in @p RegToId and vice-versa in /// @p IdToReg. @p IdToReg is populated only in DEBUG mode. -static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId, +static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId, MapIdToReg &IdToReg, const TargetRegisterInfo *TRI) { unsigned CurRegId = 0; diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 61017c1..99cb641 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -3158,7 +3158,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CC)); + MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); CLI.Call = MIB; @@ -4563,7 +4563,7 @@ bool AArch64FastISel::selectShift(const Instruction *I) { unsigned ResultReg = 0; uint64_t ShiftVal = C->getZExtValue(); MVT SrcVT = RetVT; - bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true; + bool IsZExt = I->getOpcode() != Instruction::AShr; const Value *Op0 = I->getOperand(0); if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { if (!isIntExtFree(ZExt)) { diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index ac11c4d..0a47dcb 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -65,7 +65,7 @@ public: /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, + unsigned ConstraintID, std::vector<SDValue> &OutOps) override; SDNode *SelectMLAV64LaneV128(SDNode *N); @@ -211,13 +211,20 @@ static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, } bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( - const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { - assert(ConstraintCode == 'm' && "unexpected asm memory constraint"); - // Require the address to be in a register. That is safe for all AArch64 - // variants and it is hard to do anything much smarter without knowing - // how the operand is used. - OutOps.push_back(Op); - return false; + const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { + switch(ConstraintID) { + default: + llvm_unreachable("Unexpected asm memory constraint"); + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_m: + case InlineAsm::Constraint_Q: + // Require the address to be in a register. That is safe for all AArch64 + // variants and it is hard to do anything much smarter without knowing + // how the operand is used. + OutOps.push_back(Op); + return false; + } + return true; } /// SelectArithImmed - Select an immediate value that can be represented as @@ -299,7 +306,7 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { } } -/// \brief Determine wether it is worth to fold V into an extended register. +/// \brief Determine whether it is worth to fold V into an extended register. bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { // it hurts if the value is used at least twice, unless we are optimizing // for code size. @@ -1055,7 +1062,7 @@ SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, SDValue Ops[] = {N->getOperand(2), // Mem operand; Chain}; - EVT ResTys[] = {MVT::Untyped, MVT::Other}; + const EVT ResTys[] = {MVT::Untyped, MVT::Other}; SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); SDValue SuperReg = SDValue(Ld, 0); @@ -1077,8 +1084,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, N->getOperand(2), // Incremental Chain}; - EVT ResTys[] = {MVT::i64, // Type of the write back register - MVT::Untyped, MVT::Other}; + const EVT ResTys[] = {MVT::i64, // Type of the write back register + MVT::Untyped, MVT::Other}; SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); @@ -1119,8 +1126,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc) { SDLoc dl(N); EVT VT = N->getOperand(2)->getValueType(0); - EVT ResTys[] = {MVT::i64, // Type of the write back register - MVT::Other}; // Type for the Chain + const EVT ResTys[] = {MVT::i64, // Type of the write back register + MVT::Other}; // Type for the Chain // Form a REG_SEQUENCE to force register allocation. bool Is128Bit = VT.getSizeInBits() == 128; @@ -1136,6 +1143,7 @@ SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, return St; } +namespace { /// WidenVector - Given a value in the V64 register class, produce the /// equivalent value in the V128 register class. class WidenVector { @@ -1156,6 +1164,7 @@ public: return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); } }; +} // namespace /// NarrowVector - Given a value in the V128 register class, produce the /// equivalent value in the V64 register class. @@ -1184,7 +1193,7 @@ SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, SDValue RegSeq = createQTuple(Regs); - EVT ResTys[] = {MVT::Untyped, MVT::Other}; + const EVT ResTys[] = {MVT::Untyped, MVT::Other}; unsigned LaneNo = cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); @@ -1224,8 +1233,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, SDValue RegSeq = createQTuple(Regs); - EVT ResTys[] = {MVT::i64, // Type of the write back register - MVT::Untyped, MVT::Other}; + const EVT ResTys[] = {MVT::i64, // Type of the write back register + MVT::Untyped, MVT::Other}; unsigned LaneNo = cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); @@ -1309,8 +1318,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, SDValue RegSeq = createQTuple(Regs); - EVT ResTys[] = {MVT::i64, // Type of the write back register - MVT::Other}; + const EVT ResTys[] = {MVT::i64, // Type of the write back register + MVT::Other}; unsigned LaneNo = cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index a1b324e..0c0e856 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -64,8 +64,16 @@ EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden, static cl::opt<bool> EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden, - cl::desc("Allow AArch64 SLI/SRI formation"), - cl::init(false)); + cl::desc("Allow AArch64 SLI/SRI formation"), + cl::init(false)); + +// FIXME: The necessary dtprel relocations don't seem to be supported +// well in the GNU bfd and gold linkers at the moment. Therefore, by +// default, for now, fall back to GeneralDynamic code generation. +cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration( + "aarch64-elf-ldtls-generation", cl::Hidden, + cl::desc("Allow AArch64 Local Dynamic TLS code generation"), + cl::init(false)); AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI) @@ -362,9 +370,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FLOG10, MVT::v8f16, Expand); // AArch64 has implementations of a lot of rounding-like FP operations. - static MVT RoundingTypes[] = { MVT::f32, MVT::f64}; - for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) { - MVT Ty = RoundingTypes[I]; + for (MVT Ty : {MVT::f32, MVT::f64}) { setOperationAction(ISD::FFLOOR, Ty, Legal); setOperationAction(ISD::FNEARBYINT, Ty, Legal); setOperationAction(ISD::FCEIL, Ty, Legal); @@ -561,9 +567,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, } // AArch64 has implementations of a lot of rounding-like FP operations. - static MVT RoundingVecTypes[] = {MVT::v2f32, MVT::v4f32, MVT::v2f64 }; - for (unsigned I = 0; I < array_lengthof(RoundingVecTypes); ++I) { - MVT Ty = RoundingVecTypes[I]; + for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) { setOperationAction(ISD::FFLOOR, Ty, Legal); setOperationAction(ISD::FNEARBYINT, Ty, Legal); setOperationAction(ISD::FCEIL, Ty, Legal); @@ -752,7 +756,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG"; case AArch64ISD::CSINC: return "AArch64ISD::CSINC"; case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER"; - case AArch64ISD::TLSDESC_CALL: return "AArch64ISD::TLSDESC_CALL"; + case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ"; case AArch64ISD::ADC: return "AArch64ISD::ADC"; case AArch64ISD::SBC: return "AArch64ISD::SBC"; case AArch64ISD::ADDS: return "AArch64ISD::ADDS"; @@ -811,6 +815,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz"; case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz"; case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz"; + case AArch64ISD::SADDV: return "AArch64ISD::SADDV"; + case AArch64ISD::UADDV: return "AArch64ISD::UADDV"; + case AArch64ISD::SMINV: return "AArch64ISD::SMINV"; + case AArch64ISD::UMINV: return "AArch64ISD::UMINV"; + case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV"; + case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV"; case AArch64ISD::NOT: return "AArch64ISD::NOT"; case AArch64ISD::BIT: return "AArch64ISD::BIT"; case AArch64ISD::CBZ: return "AArch64ISD::CBZ"; @@ -1247,7 +1257,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { case ISD::SMULO: case ISD::UMULO: { CC = AArch64CC::NE; - bool IsSigned = (Op.getOpcode() == ISD::SMULO) ? true : false; + bool IsSigned = Op.getOpcode() == ISD::SMULO; if (Op.getValueType() == MVT::i32) { unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; // For a 32 bit multiply with overflow check we want the instruction @@ -2784,13 +2794,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); if (IsThisReturn) { // For 'this' returns, use the X0-preserving mask if applicable - Mask = TRI->getThisReturnPreservedMask(CallConv); + Mask = TRI->getThisReturnPreservedMask(MF, CallConv); if (!Mask) { IsThisReturn = false; - Mask = TRI->getCallPreservedMask(CallConv); + Mask = TRI->getCallPreservedMask(MF, CallConv); } } else - Mask = TRI->getCallPreservedMask(CallConv); + Mask = TRI->getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -3027,58 +3037,34 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, /// When accessing thread-local variables under either the general-dynamic or /// local-dynamic system, we make a "TLS-descriptor" call. The variable will /// have a descriptor, accessible via a PC-relative ADRP, and whose first entry -/// is a function pointer to carry out the resolution. This function takes the -/// address of the descriptor in X0 and returns the TPIDR_EL0 offset in X0. All -/// other registers (except LR, NZCV) are preserved. -/// -/// Thus, the ideal call sequence on AArch64 is: -/// -/// adrp x0, :tlsdesc:thread_var -/// ldr x8, [x0, :tlsdesc_lo12:thread_var] -/// add x0, x0, :tlsdesc_lo12:thread_var -/// .tlsdesccall thread_var -/// blr x8 -/// (TPIDR_EL0 offset now in x0). +/// is a function pointer to carry out the resolution. /// -/// The ".tlsdesccall" directive instructs the assembler to insert a particular -/// relocation to help the linker relax this sequence if it turns out to be too -/// conservative. +/// The sequence is: +/// adrp x0, :tlsdesc:var +/// ldr x1, [x0, #:tlsdesc_lo12:var] +/// add x0, x0, #:tlsdesc_lo12:var +/// .tlsdesccall var +/// blr x1 +/// (TPIDR_EL0 offset now in x0) /// -/// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this -/// is harmless. -SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr, - SDValue DescAddr, SDLoc DL, - SelectionDAG &DAG) const { +/// The above sequence must be produced unscheduled, to enable the linker to +/// optimize/relax this sequence. +/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the +/// above sequence, and expanded really late in the compilation flow, to ensure +/// the sequence is produced as per above. +SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL, + SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); - // The function we need to call is simply the first entry in the GOT for this - // descriptor, load it in preparation. - SDValue Func = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, SymAddr); - - // TLS calls preserve all registers except those that absolutely must be - // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be - // silly). - const uint32_t *Mask = - Subtarget->getRegisterInfo()->getTLSCallPreservedMask(); - - // The function takes only one argument: the address of the descriptor itself - // in X0. - SDValue Glue, Chain; - Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue); - Glue = Chain.getValue(1); + SDValue Chain = DAG.getEntryNode(); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - // We're now ready to populate the argument list, as with a normal call: - SmallVector<SDValue, 6> Ops; + SmallVector<SDValue, 2> Ops; Ops.push_back(Chain); - Ops.push_back(Func); Ops.push_back(SymAddr); - Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT)); - Ops.push_back(DAG.getRegisterMask(Mask)); - Ops.push_back(Glue); - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(AArch64ISD::TLSDESC_CALL, DL, NodeTys, Ops); - Glue = Chain.getValue(1); + Chain = DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, Ops); + SDValue Glue = Chain.getValue(1); return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue); } @@ -3089,9 +3075,18 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, assert(Subtarget->isTargetELF() && "This function expects an ELF target"); assert(getTargetMachine().getCodeModel() == CodeModel::Small && "ELF TLS only supported in small memory model"); + // Different choices can be made for the maximum size of the TLS area for a + // module. For the small address model, the default TLS size is 16MiB and the + // maximum TLS size is 4GiB. + // FIXME: add -mtls-size command line option and make it control the 16MiB + // vs. 4GiB code sequence generation. const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); + if (!EnableAArch64ELFLocalDynamicTLSGeneration) { + if (Model == TLSModel::LocalDynamic) + Model = TLSModel::GeneralDynamic; + } SDValue TPOff; EVT PtrVT = getPointerTy(); @@ -3102,17 +3097,20 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, if (Model == TLSModel::LocalExec) { SDValue HiVar = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1); + GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12); SDValue LoVar = DAG.getTargetGlobalAddress( GV, DL, PtrVT, 0, - AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC); + AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar, - DAG.getTargetConstant(16, MVT::i32)), - 0); - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar, - DAG.getTargetConstant(0, MVT::i32)), - 0); + SDValue TPWithOff_lo = + SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase, + HiVar, DAG.getTargetConstant(0, MVT::i32)), + 0); + SDValue TPWithOff = + SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo, + LoVar, DAG.getTargetConstant(0, MVT::i32)), + 0); + return TPWithOff; } else if (Model == TLSModel::InitialExec) { TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff); @@ -3127,19 +3125,6 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, DAG.getMachineFunction().getInfo<AArch64FunctionInfo>(); MFI->incNumLocalDynamicTLSAccesses(); - // Accesses used in this sequence go via the TLS descriptor which lives in - // the GOT. Prepare an address we can use to handle this. - SDValue HiDesc = DAG.getTargetExternalSymbol( - "_TLS_MODULE_BASE_", PtrVT, AArch64II::MO_TLS | AArch64II::MO_PAGE); - SDValue LoDesc = DAG.getTargetExternalSymbol( - "_TLS_MODULE_BASE_", PtrVT, - AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - - // First argument to the descriptor call is the address of the descriptor - // itself. - SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc); - DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc); - // The call needs a relocation too for linker relaxation. It doesn't make // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of // the address. @@ -3148,40 +3133,23 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, // Now we can calculate the offset from TPIDR_EL0 to this module's // thread-local area. - TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG); + TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG); // Now use :dtprel_whatever: operations to calculate this variable's offset // in its thread-storage area. SDValue HiVar = DAG.getTargetGlobalAddress( - GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_G1); + GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12); SDValue LoVar = DAG.getTargetGlobalAddress( GV, DL, MVT::i64, 0, - AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC); - - SDValue DTPOff = - SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar, - DAG.getTargetConstant(16, MVT::i32)), - 0); - DTPOff = - SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, DTPOff, LoVar, - DAG.getTargetConstant(0, MVT::i32)), - 0); - - TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff); - } else if (Model == TLSModel::GeneralDynamic) { - // Accesses used in this sequence go via the TLS descriptor which lives in - // the GOT. Prepare an address we can use to handle this. - SDValue HiDesc = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGE); - SDValue LoDesc = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - // First argument to the descriptor call is the address of the descriptor - // itself. - SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc); - DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc); - + TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar, + DAG.getTargetConstant(0, MVT::i32)), + 0); + TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar, + DAG.getTargetConstant(0, MVT::i32)), + 0); + } else if (Model == TLSModel::GeneralDynamic) { // The call needs a relocation too for linker relaxation. It doesn't make // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of // the address. @@ -3189,7 +3157,7 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); // Finally we can make a call to calculate the offset from tpidr_el0. - TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG); + TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG); } else llvm_unreachable("Unsupported ELF TLS access model"); @@ -3356,11 +3324,12 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, EVT VecVT; EVT EltVT; - SDValue EltMask, VecVal1, VecVal2; + uint64_t EltMask; + SDValue VecVal1, VecVal2; if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) { EltVT = MVT::i32; VecVT = MVT::v4i32; - EltMask = DAG.getConstant(0x80000000ULL, EltVT); + EltMask = 0x80000000ULL; if (!VT.isVector()) { VecVal1 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT, @@ -3378,7 +3347,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, // We want to materialize a mask with the the high bit set, but the AdvSIMD // immediate moves cannot materialize that in a single instruction for // 64-bit elements. Instead, materialize zero and then negate it. - EltMask = DAG.getConstant(0, EltVT); + EltMask = 0; if (!VT.isVector()) { VecVal1 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT, @@ -3393,11 +3362,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, llvm_unreachable("Invalid type for copysign!"); } - std::vector<SDValue> BuildVectorOps; - for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) - BuildVectorOps.push_back(EltMask); - - SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, BuildVectorOps); + SDValue BuildVec = DAG.getConstant(EltMask, VecVT); // If we couldn't materialize the mask above, then the mask vector will be // the zero vector, and we need to negate it here. @@ -5927,8 +5892,10 @@ FailedModImm: if (VT.getVectorElementType().isFloatingPoint()) { SmallVector<SDValue, 8> Ops; - MVT NewType = - (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; + EVT EltTy = VT.getVectorElementType(); + assert ((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) && + "Unsupported floating-point vector type"); + MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits()); for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i))); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts); @@ -6781,7 +6748,7 @@ bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, unsigned LZ = countLeadingZeros((uint64_t)Val); unsigned Shift = (63 - LZ) / 16; // MOVZ is free so return true for one or fewer MOVK. - return (Shift < 3) ? true : false; + return Shift < 3; } // Generate SUBS and CSEL for integer abs. @@ -6898,6 +6865,15 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, N->getOperand(0)); } } else { + // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) + APInt VNP1 = -Value + 1; + if (VNP1.isPowerOf2()) { + SDValue ShiftedVal = + DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), + DAG.getConstant(VNP1.logBase2(), MVT::i64)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N->getOperand(0), + ShiftedVal); + } // (mul x, -(2^N + 1)) => - (add (shl x, N), x) APInt VNM1 = -Value - 1; if (VNM1.isPowerOf2()) { @@ -6908,15 +6884,6 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0)); return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), Add); } - // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) - APInt VNP1 = -Value + 1; - if (VNP1.isPowerOf2()) { - SDValue ShiftedVal = - DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), - DAG.getConstant(VNP1.logBase2(), MVT::i64)); - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N->getOperand(0), - ShiftedVal); - } } } return SDValue(); @@ -7211,21 +7178,54 @@ static SDValue performBitcastCombine(SDNode *N, static SDValue performConcatVectorsCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { + SDLoc dl(N); + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + + // Optimize concat_vectors of truncated vectors, where the intermediate + // type is illegal, to avoid said illegality, e.g., + // (v4i16 (concat_vectors (v2i16 (truncate (v2i64))), + // (v2i16 (truncate (v2i64))))) + // -> + // (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))), + // (v4i32 (bitcast (v2i64))), + // <0, 2, 4, 6>))) + // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed + // on both input and result type, so we might generate worse code. + // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8. + if (N->getNumOperands() == 2 && + N0->getOpcode() == ISD::TRUNCATE && + N1->getOpcode() == ISD::TRUNCATE) { + SDValue N00 = N0->getOperand(0); + SDValue N10 = N1->getOperand(0); + EVT N00VT = N00.getValueType(); + + if (N00VT == N10.getValueType() && + (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) && + N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) { + MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16); + SmallVector<int, 8> Mask(MidVT.getVectorNumElements()); + for (size_t i = 0; i < Mask.size(); ++i) + Mask[i] = i * 2; + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getVectorShuffle( + MidVT, dl, + DAG.getNode(ISD::BITCAST, dl, MidVT, N00), + DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask)); + } + } + // Wait 'til after everything is legalized to try this. That way we have // legal vector types and such. if (DCI.isBeforeLegalizeOps()) return SDValue(); - SDLoc dl(N); - EVT VT = N->getValueType(0); - // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector // splat. The indexed instructions are going to be expecting a DUPLANE64, so // canonicalise to that. - if (N->getOperand(0) == N->getOperand(1) && VT.getVectorNumElements() == 2) { + if (N0 == N1 && VT.getVectorNumElements() == 2) { assert(VT.getVectorElementType().getSizeInBits() == 64); - return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, - WidenVector(N->getOperand(0), DAG), + return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG), DAG.getConstant(0, MVT::i64)); } @@ -7238,10 +7238,9 @@ static SDValue performConcatVectorsCombine(SDNode *N, // becomes // (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS)) - SDValue Op1 = N->getOperand(1); - if (Op1->getOpcode() != ISD::BITCAST) + if (N1->getOpcode() != ISD::BITCAST) return SDValue(); - SDValue RHS = Op1->getOperand(0); + SDValue RHS = N1->getOperand(0); MVT RHSTy = RHS.getValueType().getSimpleVT(); // If the RHS is not a vector, this is not the pattern we're looking for. if (!RHSTy.isVector()) @@ -7251,10 +7250,10 @@ static SDValue performConcatVectorsCombine(SDNode *N, MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(), RHSTy.getVectorNumElements() * 2); - return DAG.getNode( - ISD::BITCAST, dl, VT, - DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy, - DAG.getNode(ISD::BITCAST, dl, RHSTy, N->getOperand(0)), RHS)); + return DAG.getNode(ISD::BITCAST, dl, VT, + DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy, + DAG.getNode(ISD::BITCAST, dl, RHSTy, N0), + RHS)); } static SDValue tryCombineFixedPointConvert(SDNode *N, @@ -7651,6 +7650,15 @@ static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) { N->getOperand(0), N->getOperand(1), AndN.getOperand(0)); } +static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N, + SelectionDAG &DAG) { + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), N->getValueType(0), + DAG.getNode(Opc, SDLoc(N), + N->getOperand(1).getSimpleValueType(), + N->getOperand(1)), + DAG.getConstant(0, MVT::i64)); +} + static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { @@ -7663,6 +7671,18 @@ static SDValue performIntrinsicCombine(SDNode *N, case Intrinsic::aarch64_neon_vcvtfxu2fp: return tryCombineFixedPointConvert(N, DCI, DAG); break; + case Intrinsic::aarch64_neon_saddv: + return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG); + case Intrinsic::aarch64_neon_uaddv: + return combineAcrossLanesIntrinsic(AArch64ISD::UADDV, N, DAG); + case Intrinsic::aarch64_neon_sminv: + return combineAcrossLanesIntrinsic(AArch64ISD::SMINV, N, DAG); + case Intrinsic::aarch64_neon_uminv: + return combineAcrossLanesIntrinsic(AArch64ISD::UMINV, N, DAG); + case Intrinsic::aarch64_neon_smaxv: + return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG); + case Intrinsic::aarch64_neon_umaxv: + return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG); case Intrinsic::aarch64_neon_fmax: return DAG.getNode(AArch64ISD::FMAX, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2)); @@ -8792,9 +8812,11 @@ bool AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { } // For the real atomic operations, we have ldxr/stxr up to 128 bits, -bool AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { +TargetLoweringBase::AtomicRMWExpansionKind +AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - return Size <= 128; + return Size <= 128 ? AtomicRMWExpansionKind::LLSC + : AtomicRMWExpansionKind::None; } bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const { diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index e973364..5ff11e8 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -30,9 +30,9 @@ enum { WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. CALL, // Function call. - // Almost the same as a normal call node, except that a TLSDesc relocation is - // needed so the linker can relax it correctly if possible. - TLSDESC_CALL, + // Produces the full sequence of instructions for getting the thread pointer + // offset of a variable into X0, using the TLSDesc model. + TLSDESC_CALLSEQ, ADRP, // Page address of a TargetGlobalAddress operand. ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. LOADgot, // Load from automatically generated descriptor (e.g. Global @@ -141,6 +141,18 @@ enum { FCMLEz, FCMLTz, + // Vector across-lanes addition + // Only the lower result lane is defined. + SADDV, + UADDV, + + // Vector across-lanes min/max + // Only the lower result lane is defined. + SMINV, + UMINV, + SMAXV, + UMAXV, + // Vector bitwise negation NOT, @@ -335,7 +347,8 @@ public: bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicRMWExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; bool useLoadStackGuardNode() const override; TargetLoweringBase::LegalizeTypeAction @@ -399,8 +412,8 @@ private: SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerELFTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL, - SelectionDAG &DAG) const; + SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL, + SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; @@ -460,6 +473,16 @@ private: std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; + unsigned getInlineAsmMemConstraint( + const std::string &ConstraintCode) const override { + if (ConstraintCode == "Q") + return InlineAsm::Constraint_Q; + // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are + // followed by llvm_unreachable so we'll leave them unimplemented in + // the backend for now. + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); + } + bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; bool mayBeEmittedAsTailCall(CallInst *CI) const override; bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 64cec55..8e0af2d 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -31,7 +31,7 @@ using namespace llvm; AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), - RI(this, &STI), Subtarget(STI) {} + RI(STI.getTargetTriple()), Subtarget(STI) {} /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. @@ -2068,10 +2068,10 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, .setMIFlag(Flag); } -MachineInstr * -AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const { +MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + ArrayRef<unsigned> Ops, + int FrameIndex) const { // This is a bit of a hack. Consider this instruction: // // %vreg0<def> = COPY %SP; GPR64all:%vreg0 diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index d8f1274..fa4b8b7 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -129,10 +129,9 @@ public: const TargetRegisterInfo *TRI) const override; using TargetInstrInfo::foldMemoryOperandImpl; - MachineInstr * - foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const override; + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, + int FrameIndex) const override; bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 6e4c0b0..ec6fa5c 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -22,6 +22,8 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">, AssemblerPredicate<"FeatureCrypto", "crypto">; def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC", "crc">; +def HasV8_1a : Predicate<"Subtarget->hasV8_1a()">, + AssemblerPredicate<"FeatureV8_1a", "v8.1a">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsCyclone : Predicate<"Subtarget->isCyclone()">; @@ -96,6 +98,19 @@ def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; + +// Generates the general dynamic sequences, i.e. +// adrp x0, :tlsdesc:var +// ldr x1, [x0, #:tlsdesc_lo12:var] +// add x0, x0, #:tlsdesc_lo12:var +// .tlsdesccall var +// blr x1 + +// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here) +// number of operands (the variable) +def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1, + [SDTCisPtrTy<0>]>; + def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, @@ -229,10 +244,11 @@ def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; -def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL", - SDT_AArch64TLSDescCall, - [SDNPInGlue, SDNPOutGlue, SDNPHasChain, - SDNPVariadic]>; +def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ", + SDT_AArch64TLSDescCallSeq, + [SDNPInGlue, SDNPOutGlue, SDNPHasChain, + SDNPVariadic]>; + def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", SDT_AArch64WrapperLarge>; @@ -244,6 +260,13 @@ def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>; def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>; +def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>; +def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>; +def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>; +def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; +def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; +def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; + //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -1049,15 +1072,16 @@ def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> { let AsmString = ".tlsdesccall $sym"; } -// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It -// gets expanded to two MCInsts during lowering. -let isCall = 1, Defs = [LR] in -def TLSDESC_BLR - : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym), - [(AArch64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>; +// FIXME: maybe the scratch register used shouldn't be fixed to X1? +// FIXME: can "hasSideEffects be dropped? +let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1, + isCodeGenOnly = 1 in +def TLSDESC_CALLSEQ + : Pseudo<(outs), (ins i64imm:$sym), + [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>; +def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), + (TLSDESC_CALLSEQ texternalsym:$sym)>; -def : Pat<(AArch64tlsdesc_call GPR64:$dest, texternalsym:$sym), - (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>; //===----------------------------------------------------------------------===// // Conditional branch (immediate) instruction. //===----------------------------------------------------------------------===// @@ -2326,8 +2350,15 @@ defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>; defm FMOV : UnscaledConversion<"fmov">; -def : Pat<(f32 (fpimm0)), (FMOVWSr WZR)>, Requires<[NoZCZ]>; -def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>; +// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable +let isReMaterializable = 1, isCodeGenOnly = 1 in { +def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, + PseudoInstExpansion<(FMOVWSr FPR32:$Rd, WZR)>, + Requires<[NoZCZ]>; +def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, + PseudoInstExpansion<(FMOVXDr FPR64:$Rd, XZR)>, + Requires<[NoZCZ]>; +} //===----------------------------------------------------------------------===// // Floating point conversion instruction. @@ -3416,10 +3447,10 @@ defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">; defm FMAXP : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">; defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">; defm FMINP : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">; -def : Pat<(i64 (int_aarch64_neon_saddv (v2i64 V128:$Rn))), - (ADDPv2i64p V128:$Rn)>; -def : Pat<(i64 (int_aarch64_neon_uaddv (v2i64 V128:$Rn))), - (ADDPv2i64p V128:$Rn)>; +def : Pat<(v2i64 (AArch64saddv V128:$Rn)), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; +def : Pat<(v2i64 (AArch64uaddv V128:$Rn)), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))), (FADDPv2i32p V64:$Rn)>; def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), @@ -3709,10 +3740,6 @@ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>; defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>; defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>; -defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>; -defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>; -defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>; -defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi32lane>; // Floating point vector extractions are codegen'd as either a sequence of @@ -3776,121 +3803,143 @@ defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>; -multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, Intrinsic intOp> { -// If there is a sign extension after this intrinsic, consume it as smov already -// performed it - def : Pat<(i32 (sext_inreg (i32 (intOp (v8i8 V64:$Rn))), i8)), - (i32 (SMOVvi8to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - (i64 0)))>; - def : Pat<(i32 (intOp (v8i8 V64:$Rn))), - (i32 (SMOVvi8to32 +// Patterns for across-vector intrinsics, that have a node equivalent, that +// returns a vector (with only the low lane defined) instead of a scalar. +// In effect, opNode is the same as (scalar_to_vector (IntNode)). +multiclass SIMDAcrossLanesIntrinsic<string baseOpc, + SDPatternOperator opNode> { +// If a lane instruction caught the vector_extract around opNode, we can +// directly match the latter to the instruction. +def : Pat<(v8i8 (opNode V64:$Rn)), + (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>; +def : Pat<(v16i8 (opNode V128:$Rn)), (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - (i64 0)))>; -// If there is a sign extension after this intrinsic, consume it as smov already -// performed it -def : Pat<(i32 (sext_inreg (i32 (intOp (v16i8 V128:$Rn))), i8)), - (i32 (SMOVvi8to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), - (i64 0)))>; -def : Pat<(i32 (intOp (v16i8 V128:$Rn))), - (i32 (SMOVvi8to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), - (i64 0)))>; + (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>; +def : Pat<(v4i16 (opNode V64:$Rn)), + (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>; +def : Pat<(v8i16 (opNode V128:$Rn)), + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>; +def : Pat<(v4i32 (opNode V128:$Rn)), + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>; + + +// If none did, fallback to the explicit patterns, consuming the vector_extract. +def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)), + (i32 0)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), + bsub), ssub)>; +def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), + bsub), ssub)>; +def : Pat<(i32 (vector_extract (insert_subvector undef, + (v4i16 (opNode V64:$Rn)), (i32 0)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), + hsub), ssub)>; +def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), + hsub), ssub)>; +def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), + ssub), ssub)>; + +} + +multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, + SDPatternOperator opNode> + : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { // If there is a sign extension after this intrinsic, consume it as smov already // performed it -def : Pat<(i32 (sext_inreg (i32 (intOp (v4i16 V64:$Rn))), i16)), +def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, + (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), i8)), + (i32 (SMOVvi8to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), + (i64 0)))>; +def : Pat<(i32 (sext_inreg (i32 (vector_extract + (opNode (v16i8 V128:$Rn)), (i64 0))), i8)), + (i32 (SMOVvi8to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), + (i64 0)))>; +def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, + (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), i16)), (i32 (SMOVvi16to32 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), (i64 0)))>; -def : Pat<(i32 (intOp (v4i16 V64:$Rn))), +def : Pat<(i32 (sext_inreg (i32 (vector_extract + (opNode (v8i16 V128:$Rn)), (i64 0))), i16)), (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), - (i64 0)))>; -// If there is a sign extension after this intrinsic, consume it as smov already -// performed it -def : Pat<(i32 (sext_inreg (i32 (intOp (v8i16 V128:$Rn))), i16)), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), - (i64 0)))>; -def : Pat<(i32 (intOp (v8i16 V128:$Rn))), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), - (i64 0)))>; - -def : Pat<(i32 (intOp (v4i32 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub), - ssub))>; + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), + (i64 0)))>; } -multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, Intrinsic intOp> { -// If there is a masking operation keeping only what has been actually -// generated, consume it. - def : Pat<(i32 (and (i32 (intOp (v8i8 V64:$Rn))), maski8_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - ssub))>; - def : Pat<(i32 (intOp (v8i8 V64:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - ssub))>; +multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, + SDPatternOperator opNode> + : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { // If there is a masking operation keeping only what has been actually // generated, consume it. -def : Pat<(i32 (and (i32 (intOp (v16i8 V128:$Rn))), maski8_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), - ssub))>; -def : Pat<(i32 (intOp (v16i8 V128:$Rn))), +def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, + (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), maski8_or_more)), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), + ssub))>; +def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))), + maski8_or_more)), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), ssub))>; - -// If there is a masking operation keeping only what has been actually -// generated, consume it. -def : Pat<(i32 (and (i32 (intOp (v4i16 V64:$Rn))), maski16_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), - ssub))>; -def : Pat<(i32 (intOp (v4i16 V64:$Rn))), +def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, + (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), maski16_or_more)), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), ssub))>; -// If there is a masking operation keeping only what has been actually -// generated, consume it. -def : Pat<(i32 (and (i32 (intOp (v8i16 V128:$Rn))), maski16_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), - ssub))>; -def : Pat<(i32 (intOp (v8i16 V128:$Rn))), +def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))), + maski16_or_more)), (i32 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), ssub))>; +} -def : Pat<(i32 (intOp (v4i32 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub), - ssub))>; +defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>; +// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm +def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))), + (ADDPv2i32 V64:$Rn, V64:$Rn)>; -} +defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>; +// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm +def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))), + (ADDPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>; +def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))), + (SMAXPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>; +def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))), + (SMINPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>; +def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))), + (UMAXPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>; +def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))), + (UMINPv2i32 V64:$Rn, V64:$Rn)>; multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> { def : Pat<(i32 (intOp (v8i8 V64:$Rn))), @@ -3953,32 +4002,6 @@ def : Pat<(i64 (intOp (v4i32 V128:$Rn))), dsub))>; } -defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", int_aarch64_neon_saddv>; -// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm -def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", int_aarch64_neon_uaddv>; -// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm -def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_aarch64_neon_smaxv>; -def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_aarch64_neon_sminv>; -def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_aarch64_neon_umaxv>; -def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_aarch64_neon_uminv>; -def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub)>; - defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>; defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>; diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 8463ce6..b1499e2 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -63,16 +63,24 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { // If a matching instruction is found, MergeForward is set to true if the // merge is to remove the first instruction and replace the second with // a pair-wise insn, and false if the reverse is true. + // \p SExtIdx[out] gives the index of the result of the load pair that + // must be extended. The value of SExtIdx assumes that the paired load + // produces the value in this order: (I, returned iterator), i.e., + // -1 means no value has to be extended, 0 means I, and 1 means the + // returned iterator. MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, - bool &MergeForward, + bool &MergeForward, int &SExtIdx, unsigned Limit); // Merge the two instructions indicated into a single pair-wise instruction. // If MergeForward is true, erase the first instruction and fold its // operation into the second. If false, the reverse. Return the instruction // following the first instruction (which may change during processing). + // \p SExtIdx index of the result that must be extended for a paired load. + // -1 means none, 0 means I, and 1 means Paired. MachineBasicBlock::iterator mergePairedInsns(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Paired, bool MergeForward); + MachineBasicBlock::iterator Paired, bool MergeForward, + int SExtIdx); // Scan the instruction list to find a base register update that can // be combined with the current instruction (a load or store) using @@ -181,6 +189,43 @@ int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) { } } +static unsigned getMatchingNonSExtOpcode(unsigned Opc, + bool *IsValidLdStrOpc = nullptr) { + if (IsValidLdStrOpc) + *IsValidLdStrOpc = true; + switch (Opc) { + default: + if (IsValidLdStrOpc) + *IsValidLdStrOpc = false; + return UINT_MAX; + case AArch64::STRDui: + case AArch64::STURDi: + case AArch64::STRQui: + case AArch64::STURQi: + case AArch64::STRWui: + case AArch64::STURWi: + case AArch64::STRXui: + case AArch64::STURXi: + case AArch64::LDRDui: + case AArch64::LDURDi: + case AArch64::LDRQui: + case AArch64::LDURQi: + case AArch64::LDRWui: + case AArch64::LDURWi: + case AArch64::LDRXui: + case AArch64::LDURXi: + case AArch64::STRSui: + case AArch64::STURSi: + case AArch64::LDRSui: + case AArch64::LDURSi: + return Opc; + case AArch64::LDRSWui: + return AArch64::LDRWui; + case AArch64::LDURSWi: + return AArch64::LDURWi; + } +} + static unsigned getMatchingPairOpcode(unsigned Opc) { switch (Opc) { default: @@ -282,7 +327,7 @@ static unsigned getPostIndexedOpcode(unsigned Opc) { MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, - bool MergeForward) { + bool MergeForward, int SExtIdx) { MachineBasicBlock::iterator NextI = I; ++NextI; // If NextI is the second of the two instructions to be merged, we need @@ -292,11 +337,13 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, if (NextI == Paired) ++NextI; - bool IsUnscaled = isUnscaledLdst(I->getOpcode()); + unsigned Opc = + SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode()); + bool IsUnscaled = isUnscaledLdst(Opc); int OffsetStride = IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1; - unsigned NewOpc = getMatchingPairOpcode(I->getOpcode()); + unsigned NewOpc = getMatchingPairOpcode(Opc); // Insert our new paired instruction after whichever of the paired // instructions MergeForward indicates. MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; @@ -311,6 +358,11 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, Paired->getOperand(2).getImm() + OffsetStride) { RtMI = Paired; Rt2MI = I; + // Here we swapped the assumption made for SExtIdx. + // I.e., we turn ldp I, Paired into ldp Paired, I. + // Update the index accordingly. + if (SExtIdx != -1) + SExtIdx = (SExtIdx + 1) % 2; } else { RtMI = I; Rt2MI = Paired; @@ -337,8 +389,47 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, DEBUG(dbgs() << " "); DEBUG(Paired->print(dbgs())); DEBUG(dbgs() << " with instruction:\n "); - DEBUG(((MachineInstr *)MIB)->print(dbgs())); - DEBUG(dbgs() << "\n"); + + if (SExtIdx != -1) { + // Generate the sign extension for the proper result of the ldp. + // I.e., with X1, that would be: + // %W1<def> = KILL %W1, %X1<imp-def> + // %X1<def> = SBFMXri %X1<kill>, 0, 31 + MachineOperand &DstMO = MIB->getOperand(SExtIdx); + // Right now, DstMO has the extended register, since it comes from an + // extended opcode. + unsigned DstRegX = DstMO.getReg(); + // Get the W variant of that register. + unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32); + // Update the result of LDP to use the W instead of the X variant. + DstMO.setReg(DstRegW); + DEBUG(((MachineInstr *)MIB)->print(dbgs())); + DEBUG(dbgs() << "\n"); + // Make the machine verifier happy by providing a definition for + // the X register. + // Insert this definition right after the generated LDP, i.e., before + // InsertionPoint. + MachineInstrBuilder MIBKill = + BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), + TII->get(TargetOpcode::KILL), DstRegW) + .addReg(DstRegW) + .addReg(DstRegX, RegState::Define); + MIBKill->getOperand(2).setImplicit(); + // Create the sign extension. + MachineInstrBuilder MIBSXTW = + BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), + TII->get(AArch64::SBFMXri), DstRegX) + .addReg(DstRegX) + .addImm(0) + .addImm(31); + (void)MIBSXTW; + DEBUG(dbgs() << " Extend operand:\n "); + DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs())); + DEBUG(dbgs() << "\n"); + } else { + DEBUG(((MachineInstr *)MIB)->print(dbgs())); + DEBUG(dbgs() << "\n"); + } // Erase the old instructions. I->eraseFromParent(); @@ -396,7 +487,8 @@ static int alignTo(int Num, int PowOf2) { /// be combined with the current instruction into a load/store pair. MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, - bool &MergeForward, unsigned Limit) { + bool &MergeForward, int &SExtIdx, + unsigned Limit) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator MBBI = I; MachineInstr *FirstMI = I; @@ -436,7 +528,19 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // Now that we know this is a real instruction, count it. ++Count; - if (Opc == MI->getOpcode() && MI->getOperand(2).isImm()) { + bool CanMergeOpc = Opc == MI->getOpcode(); + SExtIdx = -1; + if (!CanMergeOpc) { + bool IsValidLdStrOpc; + unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc); + if (!IsValidLdStrOpc) + continue; + // Opc will be the first instruction in the pair. + SExtIdx = NonSExtOpc == (unsigned)Opc ? 1 : 0; + CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode()); + } + + if (CanMergeOpc && MI->getOperand(2).isImm()) { // If we've found another instruction with the same opcode, check to see // if the base and offset are compatible with our starting instruction. // These instructions all have scaled immediate operands, so we just @@ -823,13 +927,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { } // Look ahead up to ScanLimit instructions for a pairable instruction. bool MergeForward = false; + int SExtIdx = -1; MachineBasicBlock::iterator Paired = - findMatchingInsn(MBBI, MergeForward, ScanLimit); + findMatchingInsn(MBBI, MergeForward, SExtIdx, ScanLimit); if (Paired != E) { // Merge the loads into a pair. Keeping the iterator straight is a // pain, so we let the merge routine tell us what the next instruction // is after it's done mucking about. - MBBI = mergePairedInsns(MBBI, Paired, MergeForward); + MBBI = mergePairedInsns(MBBI, Paired, MergeForward, SExtIdx); Modified = true; ++NumPairCreated; diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp index e57b0f4..b829341 100644 --- a/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -22,9 +22,12 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; +extern cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration; + AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, AsmPrinter &printer) : Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {} @@ -84,10 +87,16 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, if (MO.isGlobal()) { const GlobalValue *GV = MO.getGlobal(); Model = Printer.TM.getTLSModel(GV); + if (!EnableAArch64ELFLocalDynamicTLSGeneration && + Model == TLSModel::LocalDynamic) + Model = TLSModel::GeneralDynamic; + } else { assert(MO.isSymbol() && StringRef(MO.getSymbolName()) == "_TLS_MODULE_BASE_" && "unexpected external TLS symbol"); + // The general dynamic access sequence is used to get the + // address of _TLS_MODULE_BASE_. Model = TLSModel::GeneralDynamic; } switch (Model) { @@ -123,6 +132,8 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, RefFlags |= AArch64MCExpr::VK_G1; else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G0) RefFlags |= AArch64MCExpr::VK_G0; + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_HI12) + RefFlags |= AArch64MCExpr::VK_HI12; if (MO.getTargetFlags() & AArch64II::MO_NC) RefFlags |= AArch64MCExpr::VK_NC; diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp index 4690177..5394875 100644 --- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp +++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp @@ -319,7 +319,7 @@ void A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, static bool regJustKilledBefore(const LiveIntervals &LIs, unsigned reg, const MachineInstr &MI) { - LiveInterval LI = LIs.getInterval(reg); + const LiveInterval &LI = LIs.getInterval(reg); SlotIndex SI = LIs.getInstructionIndex(&MI); return LI.expiredAt(SI); } diff --git a/lib/Target/AArch64/AArch64PromoteConstant.cpp b/lib/Target/AArch64/AArch64PromoteConstant.cpp index c037c86..e1b93bf 100644 --- a/lib/Target/AArch64/AArch64PromoteConstant.cpp +++ b/lib/Target/AArch64/AArch64PromoteConstant.cpp @@ -38,6 +38,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -189,9 +190,11 @@ private: IPI->second.push_back(&Use); // Transfer the dominated uses of IPI to NewPt // Inserting into the DenseMap may invalidate existing iterator. - // Keep a copy of the key to find the iterator to erase. + // Keep a copy of the key to find the iterator to erase. Keep a copy of the + // value so that we don't have to dereference IPI->second. Instruction *OldInstr = IPI->first; - InsertPts[NewPt] = std::move(IPI->second); + Uses OldUses = std::move(IPI->second); + InsertPts[NewPt] = std::move(OldUses); // Erase IPI. InsertPts.erase(OldInstr); } diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index 206cdbb..33c11fe 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -18,6 +18,7 @@ #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -37,9 +38,8 @@ static cl::opt<bool> ReserveX18("aarch64-reserve-x18", cl::Hidden, cl::desc("Reserve X18, making it unavailable as GPR")); -AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii, - const AArch64Subtarget *sti) - : AArch64GenRegisterInfo(AArch64::LR), TII(tii), STI(sti) {} +AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT) + : AArch64GenRegisterInfo(AArch64::LR), TT(TT) {} const MCPhysReg * AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { @@ -55,7 +55,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } const uint32_t * -AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { +AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { if (CC == CallingConv::GHC) // This is academic becase all GHC calls are (supposed to be) tail calls return CSR_AArch64_NoRegs_RegMask; @@ -66,15 +67,16 @@ AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { } const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const { - if (STI->isTargetDarwin()) + if (TT.isOSDarwin()) return CSR_AArch64_TLS_Darwin_RegMask; - assert(STI->isTargetELF() && "only expect Darwin or ELF TLS"); + assert(TT.isOSBinFormatELF() && "only expect Darwin or ELF TLS"); return CSR_AArch64_TLS_ELF_RegMask; } const uint32_t * -AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { +AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { // This should return a register mask that is the same as that returned by // getCallPreservedMask but that additionally preserves the register used for // the first i64 argument (which must also be the register used to return a @@ -97,12 +99,12 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AArch64::WSP); Reserved.set(AArch64::WZR); - if (TFI->hasFP(MF) || STI->isTargetDarwin()) { + if (TFI->hasFP(MF) || TT.isOSDarwin()) { Reserved.set(AArch64::FP); Reserved.set(AArch64::W29); } - if (STI->isTargetDarwin() || ReserveX18) { + if (TT.isOSDarwin() || ReserveX18) { Reserved.set(AArch64::X18); // Platform register Reserved.set(AArch64::W18); } @@ -129,10 +131,10 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF, return true; case AArch64::X18: case AArch64::W18: - return STI->isTargetDarwin() || ReserveX18; + return TT.isOSDarwin() || ReserveX18; case AArch64::FP: case AArch64::W29: - return TFI->hasFP(MF) || STI->isTargetDarwin(); + return TFI->hasFP(MF) || TT.isOSDarwin(); case AArch64::W19: case AArch64::X19: return hasBasePointer(MF); @@ -269,7 +271,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI, // The FP is only available if there is no dynamic realignment. We // don't know for sure yet whether we'll need that, so we guess based // on whether there are any local variables that would trigger it. - if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, FPOffset)) + if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, AArch64::FP, FPOffset)) return false; // If we can reference via the stack pointer or base pointer, try that. @@ -277,7 +279,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI, // to only disallow SP relative references in the live range of // the VLA(s). In practice, it's unclear how much difference that // would make, but it may be worth doing. - if (isFrameOffsetLegal(MI, Offset)) + if (isFrameOffsetLegal(MI, AArch64::SP, Offset)) return false; // The offset likely isn't legal; we want to allocate a virtual base register. @@ -285,6 +287,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI, } bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, + unsigned BaseReg, int64_t Offset) const { assert(Offset <= INT_MAX && "Offset too big to fit in int."); assert(MI && "Unable to get the legal offset for nil instruction."); @@ -302,10 +305,11 @@ void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, DebugLoc DL; // Defaults to "unknown" if (Ins != MBB->end()) DL = Ins->getDebugLoc(); - + const MachineFunction &MF = *MBB->getParent(); + const AArch64InstrInfo *TII = + MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); const MCInstrDesc &MCID = TII->get(AArch64::ADDXri); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - const MachineFunction &MF = *MBB->getParent(); MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF)); unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); @@ -324,6 +328,9 @@ void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, ++i; assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); } + const MachineFunction *MF = MI.getParent()->getParent(); + const AArch64InstrInfo *TII = + MF->getSubtarget<AArch64Subtarget>().getInstrInfo(); bool Done = rewriteAArch64FrameIndex(MI, i, BaseReg, Off, TII); assert(Done && "Unable to resolve frame index!"); (void)Done; @@ -337,6 +344,8 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const AArch64InstrInfo *TII = + MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); const AArch64FrameLowering *TFI = static_cast<const AArch64FrameLowering *>( MF.getSubtarget().getFrameLowering()); @@ -389,10 +398,10 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, case AArch64::GPR64RegClassID: case AArch64::GPR32commonRegClassID: case AArch64::GPR64commonRegClassID: - return 32 - 1 // XZR/SP - - (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP - - (STI->isTargetDarwin() || ReserveX18) // X18 reserved as platform register - - hasBasePointer(MF); // X19 + return 32 - 1 // XZR/SP + - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP + - (TT.isOSDarwin() || ReserveX18) // X18 reserved as platform register + - hasBasePointer(MF); // X19 case AArch64::FPR8RegClassID: case AArch64::FPR16RegClassID: case AArch64::FPR32RegClassID: diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h index 51a5034..c01bfa5 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/lib/Target/AArch64/AArch64RegisterInfo.h @@ -19,26 +19,24 @@ namespace llvm { -class AArch64InstrInfo; -class AArch64Subtarget; class MachineFunction; class RegScavenger; class TargetRegisterClass; +class Triple; struct AArch64RegisterInfo : public AArch64GenRegisterInfo { private: - const AArch64InstrInfo *TII; - const AArch64Subtarget *STI; + const Triple &TT; public: - AArch64RegisterInfo(const AArch64InstrInfo *tii, const AArch64Subtarget *sti); + AArch64RegisterInfo(const Triple &TT); bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; /// Code Generation virtual methods... - const MCPhysReg * - getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; - const uint32_t *getCallPreservedMask(CallingConv::ID) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const override; unsigned getCSRFirstUseCost() const override { // The cost will be compared against BlockFrequency where entry has the @@ -59,7 +57,8 @@ public: /// /// Should return NULL in the case that the calling convention does not have /// this property - const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; + const uint32_t *getThisReturnPreservedMask(const MachineFunction &MF, + CallingConv::ID) const; BitVector getReservedRegs(const MachineFunction &MF) const override; const TargetRegisterClass * @@ -73,7 +72,7 @@ public: bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override; - bool isFrameOffsetLegal(const MachineInstr *MI, + bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override; void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index c613025..221d70d 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -48,7 +48,7 @@ AArch64Subtarget::AArch64Subtarget(const std::string &TT, const TargetMachine &TM, bool LittleEndian) : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false), - HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), + HasV8_1a(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {} diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index d418cc5..bcab97d 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -41,6 +41,7 @@ protected: bool HasNEON; bool HasCrypto; bool HasCRC; + bool HasV8_1a; // HasZeroCycleRegMove - Has zero-cycle register mov instructions. bool HasZeroCycleRegMove; @@ -86,6 +87,7 @@ public: const AArch64RegisterInfo *getRegisterInfo() const override { return &getInstrInfo()->getRegisterInfo(); } + const Triple &getTargetTriple() const { return TargetTriple; } bool enableMachineScheduler() const override { return true; } bool enablePostMachineScheduler() const override { return isCortexA53() || isCortexA57(); @@ -99,6 +101,7 @@ public: bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } + bool hasV8_1a() const { return HasV8_1a; } bool isLittleEndian() const { return IsLittle; } diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index d73d0b3..f902f64 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -104,6 +104,16 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { return make_unique<AArch64_ELFTargetObjectFile>(); } +// Helper function to build a DataLayout string +static std::string computeDataLayout(StringRef TT, bool LittleEndian) { + Triple Triple(TT); + if (Triple.isOSBinFormatMachO()) + return "e-m:o-i64:64-i128:128-n32:64-S128"; + if (LittleEndian) + return "e-m:e-i64:64-i128:128-n32:64-S128"; + return "E-m:e-i64:64-i128:128-n32:64-S128"; +} + /// TargetMachine ctor - Create an AArch64 architecture model. /// AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT, @@ -112,16 +122,12 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool LittleEndian) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - // This nested ternary is horrible, but DL needs to be properly - // initialized - // before TLInfo is constructed. - DL(Triple(TT).isOSBinFormatMachO() - ? "e-m:o-i64:64-i128:128-n32:64-S128" - : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128" - : "E-m:e-i64:64-i128:128-n32:64-S128")), + // This nested ternary is horrible, but DL needs to be properly + // initialized before TLInfo is constructed. + : LLVMTargetMachine(T, computeDataLayout(TT, LittleEndian), TT, CPU, FS, + Options, RM, CM, OL), TLOF(createTLOF(Triple(getTargetTriple()))), - Subtarget(TT, CPU, FS, *this, LittleEndian), isLittle(LittleEndian) { + isLittle(LittleEndian) { initAsmInfo(); } @@ -239,7 +245,7 @@ bool AArch64PassConfig::addPreISel() { // FIXME: On AArch64, this depends on the type. // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes(). // and the offset has to be a multiple of the related size in bytes. - if (TM->getOptLevel() != CodeGenOpt::None) + if (TM->getOptLevel() == CodeGenOpt::Aggressive) addPass(createGlobalMergePass(TM, 4095)); if (TM->getOptLevel() != CodeGenOpt::None) addPass(createAArch64AddressTypePromotionPass()); @@ -287,10 +293,7 @@ void AArch64PassConfig::addPostRegAlloc() { // Change dead register definitions to refer to the zero register. if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination) addPass(createAArch64DeadRegisterDefinitions()); - if (TM->getOptLevel() != CodeGenOpt::None && - (TM->getSubtarget<AArch64Subtarget>().isCortexA53() || - TM->getSubtarget<AArch64Subtarget>().isCortexA57()) && - usingDefaultRegAlloc()) + if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc()) // Improve performance for some FP/SIMD code for A57. addPass(createAArch64A57FPLoadBalancing()); } diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h index 7143adf..ec34fad 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.h +++ b/lib/Target/AArch64/AArch64TargetMachine.h @@ -23,9 +23,7 @@ namespace llvm { class AArch64TargetMachine : public LLVMTargetMachine { protected: - const DataLayout DL; std::unique_ptr<TargetLoweringObjectFile> TLOF; - AArch64Subtarget Subtarget; mutable StringMap<std::unique_ptr<AArch64Subtarget>> SubtargetMap; public: @@ -35,11 +33,6 @@ public: CodeGenOpt::Level OL, bool IsLittleEndian); ~AArch64TargetMachine() override; - - const DataLayout *getDataLayout() const override { return &DL; } - const AArch64Subtarget *getSubtargetImpl() const override { - return &Subtarget; - } const AArch64Subtarget *getSubtargetImpl(const Function &F) const override; // Pass Pipeline Configuration diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp index 4069038..8ff58e9 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -13,6 +13,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/Dwarf.h" using namespace llvm; using namespace dwarf; @@ -23,6 +24,11 @@ void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx, InitializeELF(TM.Options.UseInitArray); } +AArch64_MachoTargetObjectFile::AArch64_MachoTargetObjectFile() + : TargetLoweringObjectFileMachO() { + SupportGOTPCRelWithOffset = false; +} + const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference( const GlobalValue *GV, unsigned Encoding, Mangler &Mang, const TargetMachine &TM, MachineModuleInfo *MMI, @@ -50,3 +56,18 @@ MCSymbol *AArch64_MachoTargetObjectFile::getCFIPersonalitySymbol( MachineModuleInfo *MMI) const { return TM.getSymbol(GV, Mang); } + +const MCExpr *AArch64_MachoTargetObjectFile::getIndirectSymViaGOTPCRel( + const MCSymbol *Sym, const MCValue &MV, int64_t Offset, + MachineModuleInfo *MMI, MCStreamer &Streamer) const { + assert((Offset+MV.getConstant() == 0) && + "Arch64 does not support GOT PC rel with extra offset"); + // On ARM64 Darwin, we can reference symbols with foo@GOT-., which + // is an indirect pc-relative reference. + const MCExpr *Res = + MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext()); + MCSymbol *PCSym = getContext().CreateTempSymbol(); + Streamer.EmitLabel(PCSym); + const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext()); + return MCBinaryExpr::CreateSub(Res, PC, getContext()); +} diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h index 2e595f9..d41f445 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.h +++ b/lib/Target/AArch64/AArch64TargetObjectFile.h @@ -24,6 +24,8 @@ class AArch64_ELFTargetObjectFile : public TargetLoweringObjectFileELF { /// AArch64_MachoTargetObjectFile - This TLOF implementation is used for Darwin. class AArch64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { public: + AArch64_MachoTargetObjectFile(); + const MCExpr *getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding, Mangler &Mang, const TargetMachine &TM, @@ -33,6 +35,11 @@ public: MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM, MachineModuleInfo *MMI) const override; + + const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + const MCValue &MV, int64_t Offset, + MachineModuleInfo *MMI, + MCStreamer &Streamer) const override; }; } // end namespace llvm diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 0646d85..0533355 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -10,6 +10,7 @@ #include "AArch64TargetTransformInfo.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" @@ -352,7 +353,7 @@ unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // We don't lower vector selects well that are wider than the register width. if (ValTy->isVectorTy() && ISD == ISD::SELECT) { // We would need this many instructions to hide the scalarization happening. - unsigned AmortizationCost = 20; + const unsigned AmortizationCost = 20; static const TypeConversionCostTblEntry<MVT::SimpleValueType> VectorSelectTbl[] = { { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost }, @@ -426,6 +427,15 @@ unsigned AArch64TTIImpl::getMaxInterleaveFactor() { void AArch64TTIImpl::getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP) { + // Enable partial unrolling and runtime unrolling. + BaseT::getUnrollingPreferences(L, UP); + + // For inner loop, it is more likely to be a hot one, and the runtime check + // can be promoted out from LICM pass, so the overhead is less, let's try + // a larger threshold to unroll more loops. + if (L->getLoopDepth() > 1) + UP.PartialThreshold *= 2; + // Disable partial & runtime unrolling on -Os. UP.PartialOptSizeThreshold = 0; } diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 1960c99..1219ffc 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -113,11 +113,10 @@ public: #define GET_OPERAND_DIAGNOSTIC_TYPES #include "AArch64GenAsmMatcher.inc" }; - AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, - const MCInstrInfo &MII, - const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(_STI) { - MCAsmParserExtension::Initialize(_Parser); + AArch64AsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser, + const MCInstrInfo &MII, const MCTargetOptions &Options) + : MCTargetAsmParser(), STI(STI) { + MCAsmParserExtension::Initialize(Parser); MCStreamer &S = getParser().getStreamer(); if (S.getTargetStreamer() == nullptr) new AArch64TargetStreamer(S); @@ -205,6 +204,8 @@ private: struct BarrierOp { unsigned Val; // Not the enum since not all values have names. + const char *Data; + unsigned Length; }; struct SysRegOp { @@ -221,6 +222,8 @@ private: struct PrefetchOp { unsigned Val; + const char *Data; + unsigned Length; }; struct ShiftExtendOp { @@ -254,8 +257,7 @@ private: MCContext &Ctx; public: - AArch64Operand(KindTy K, MCContext &_Ctx) - : MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {} + AArch64Operand(KindTy K, MCContext &Ctx) : Kind(K), Ctx(Ctx) {} AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) { Kind = o.Kind; @@ -349,6 +351,11 @@ public: return Barrier.Val; } + StringRef getBarrierName() const { + assert(Kind == k_Barrier && "Invalid access!"); + return StringRef(Barrier.Data, Barrier.Length); + } + unsigned getReg() const override { assert(Kind == k_Register && "Invalid access!"); return Reg.RegNum; @@ -384,6 +391,11 @@ public: return Prefetch.Val; } + StringRef getPrefetchName() const { + assert(Kind == k_Prefetch && "Invalid access!"); + return StringRef(Prefetch.Data, Prefetch.Length); + } + AArch64_AM::ShiftExtendType getShiftExtendType() const { assert(Kind == k_ShiftExtend && "Invalid access!"); return ShiftExtend.Type; @@ -752,58 +764,47 @@ public: } bool isMovZSymbolG3() const { - static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 }; - return isMovWSymbol(Variants); + return isMovWSymbol(AArch64MCExpr::VK_ABS_G3); } bool isMovZSymbolG2() const { - static AArch64MCExpr::VariantKind Variants[] = { - AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S, - AArch64MCExpr::VK_TPREL_G2, AArch64MCExpr::VK_DTPREL_G2}; - return isMovWSymbol(Variants); + return isMovWSymbol({AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S, + AArch64MCExpr::VK_TPREL_G2, + AArch64MCExpr::VK_DTPREL_G2}); } bool isMovZSymbolG1() const { - static AArch64MCExpr::VariantKind Variants[] = { - AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S, + return isMovWSymbol({ + AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S, AArch64MCExpr::VK_GOTTPREL_G1, AArch64MCExpr::VK_TPREL_G1, AArch64MCExpr::VK_DTPREL_G1, - }; - return isMovWSymbol(Variants); + }); } bool isMovZSymbolG0() const { - static AArch64MCExpr::VariantKind Variants[] = { - AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S, - AArch64MCExpr::VK_TPREL_G0, AArch64MCExpr::VK_DTPREL_G0}; - return isMovWSymbol(Variants); + return isMovWSymbol({AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S, + AArch64MCExpr::VK_TPREL_G0, + AArch64MCExpr::VK_DTPREL_G0}); } bool isMovKSymbolG3() const { - static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 }; - return isMovWSymbol(Variants); + return isMovWSymbol(AArch64MCExpr::VK_ABS_G3); } bool isMovKSymbolG2() const { - static AArch64MCExpr::VariantKind Variants[] = { - AArch64MCExpr::VK_ABS_G2_NC}; - return isMovWSymbol(Variants); + return isMovWSymbol(AArch64MCExpr::VK_ABS_G2_NC); } bool isMovKSymbolG1() const { - static AArch64MCExpr::VariantKind Variants[] = { - AArch64MCExpr::VK_ABS_G1_NC, AArch64MCExpr::VK_TPREL_G1_NC, - AArch64MCExpr::VK_DTPREL_G1_NC - }; - return isMovWSymbol(Variants); + return isMovWSymbol({AArch64MCExpr::VK_ABS_G1_NC, + AArch64MCExpr::VK_TPREL_G1_NC, + AArch64MCExpr::VK_DTPREL_G1_NC}); } bool isMovKSymbolG0() const { - static AArch64MCExpr::VariantKind Variants[] = { - AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC, - AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC - }; - return isMovWSymbol(Variants); + return isMovWSymbol( + {AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC, + AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC}); } template<int RegWidth, int Shift> @@ -1608,10 +1609,14 @@ public: return Op; } - static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val, SMLoc S, + static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val, + StringRef Str, + SMLoc S, MCContext &Ctx) { auto Op = make_unique<AArch64Operand>(k_Barrier, Ctx); Op->Barrier.Val = Val; + Op->Barrier.Data = Str.data(); + Op->Barrier.Length = Str.size(); Op->StartLoc = S; Op->EndLoc = S; return Op; @@ -1642,10 +1647,14 @@ public: return Op; } - static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val, SMLoc S, + static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val, + StringRef Str, + SMLoc S, MCContext &Ctx) { auto Op = make_unique<AArch64Operand>(k_Prefetch, Ctx); Op->Prefetch.Val = Val; + Op->Barrier.Data = Str.data(); + Op->Barrier.Length = Str.size(); Op->StartLoc = S; Op->EndLoc = S; return Op; @@ -1673,9 +1682,8 @@ void AArch64Operand::print(raw_ostream &OS) const { << AArch64_AM::getFPImmFloat(getFPImm()) << ") >"; break; case k_Barrier: { - bool Valid; - StringRef Name = AArch64DB::DBarrierMapper().toString(getBarrier(), Valid); - if (Valid) + StringRef Name = getBarrierName(); + if (!Name.empty()) OS << "<barrier " << Name << ">"; else OS << "<barrier invalid #" << getBarrier() << ">"; @@ -1718,9 +1726,8 @@ void AArch64Operand::print(raw_ostream &OS) const { OS << "c" << getSysCR(); break; case k_Prefetch: { - bool Valid; - StringRef Name = AArch64PRFM::PRFMMapper().toString(getPrefetch(), Valid); - if (Valid) + StringRef Name = getPrefetchName(); + if (!Name.empty()) OS << "<prfop " << Name << ">"; else OS << "<prfop invalid #" << getPrefetch() << ">"; @@ -1963,7 +1970,11 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) { return MatchOperand_ParseFail; } - Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext())); + bool Valid; + auto Mapper = AArch64PRFM::PRFMMapper(); + StringRef Name = Mapper.toString(MCE->getValue(), Valid); + Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Name, + S, getContext())); return MatchOperand_Success; } @@ -1973,14 +1984,16 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) { } bool Valid; - unsigned prfop = AArch64PRFM::PRFMMapper().fromString(Tok.getString(), Valid); + auto Mapper = AArch64PRFM::PRFMMapper(); + unsigned prfop = Mapper.fromString(Tok.getString(), Valid); if (!Valid) { TokError("pre-fetch hint expected"); return MatchOperand_ParseFail; } Parser.Lex(); // Eat identifier token. - Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext())); + Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Tok.getString(), + S, getContext())); return MatchOperand_Success; } @@ -2582,8 +2595,11 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { Error(ExprLoc, "barrier operand out of range"); return MatchOperand_ParseFail; } - Operands.push_back( - AArch64Operand::CreateBarrier(MCE->getValue(), ExprLoc, getContext())); + bool Valid; + auto Mapper = AArch64DB::DBarrierMapper(); + StringRef Name = Mapper.toString(MCE->getValue(), Valid); + Operands.push_back( AArch64Operand::CreateBarrier(MCE->getValue(), Name, + ExprLoc, getContext())); return MatchOperand_Success; } @@ -2593,7 +2609,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { } bool Valid; - unsigned Opt = AArch64DB::DBarrierMapper().fromString(Tok.getString(), Valid); + auto Mapper = AArch64DB::DBarrierMapper(); + unsigned Opt = Mapper.fromString(Tok.getString(), Valid); if (!Valid) { TokError("invalid barrier option name"); return MatchOperand_ParseFail; @@ -2605,8 +2622,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { return MatchOperand_ParseFail; } - Operands.push_back( - AArch64Operand::CreateBarrier(Opt, getLoc(), getContext())); + Operands.push_back( AArch64Operand::CreateBarrier(Opt, Tok.getString(), + getLoc(), getContext())); Parser.Lex(); // Consume the option return MatchOperand_Success; @@ -2631,8 +2648,8 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) { assert(IsKnown == (MSRReg != -1U) && "register should be -1 if and only if it's unknown"); - uint32_t PStateField = - AArch64PState::PStateMapper().fromString(Tok.getString(), IsKnown); + auto PStateMapper = AArch64PState::PStateMapper(); + uint32_t PStateField = PStateMapper.fromString(Tok.getString(), IsKnown); assert(IsKnown == (PStateField != -1U) && "register should be -1 if and only if it's unknown"); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 423da65..84b63a0 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -18,6 +18,7 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachO.h" using namespace llvm; @@ -493,14 +494,28 @@ void ELFAArch64AsmBackend::processFixupValue( IsResolved = false; } +// Returns whether this fixup is based on an address in the .eh_frame section, +// and therefore should be byte swapped. +// FIXME: Should be replaced with something more principled. +static bool isByteSwappedFixup(const MCExpr *E) { + MCValue Val; + if (!E->EvaluateAsRelocatable(Val, nullptr, nullptr)) + return false; + + if (!Val.getSymA() || Val.getSymA()->getSymbol().isUndefined()) + return false; + + const MCSectionELF *SecELF = + dyn_cast<MCSectionELF>(&Val.getSymA()->getSymbol().getSection()); + return SecELF->getSectionName() == ".eh_frame"; +} + void ELFAArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const { // store fixups in .eh_frame section in big endian order if (!IsLittleEndian && Fixup.getKind() == FK_Data_4) { - const MCSection *Sec = Fixup.getValue()->FindAssociatedSection(); - const MCSectionELF *SecELF = dyn_cast_or_null<const MCSectionELF>(Sec); - if (SecELF && SecELF->getSectionName() == ".eh_frame") + if (isByteSwappedFixup(Fixup.getValue())) Value = ByteSwap_32(unsigned(Value)); } AArch64AsmBackend::applyFixup (Fixup, Data, DataSize, Value, IsPCRel); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 8dc6c30..8f780d2 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -203,24 +203,27 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) { } namespace llvm { -MCStreamer * -createAArch64MCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useDwarfDirectory, - MCInstPrinter *InstPrint, MCCodeEmitter *CE, - MCAsmBackend *TAB, bool ShowInst) { - MCStreamer *S = llvm::createAsmStreamer( - Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst); - new AArch64TargetAsmStreamer(*S, OS); - return S; +MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm) { + return new AArch64TargetAsmStreamer(S, OS); } MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, bool RelaxAll) { AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter); - new AArch64TargetELFStreamer(*S); if (RelaxAll) S->getAssembler().setRelaxAll(true); return S; } + +MCTargetStreamer * +createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + Triple TT(STI.getTargetTriple()); + if (TT.getObjectFormat() == Triple::ELF) + return new AArch64TargetELFStreamer(S); + return nullptr; +} } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 4756a19..9ea49f0 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -38,9 +38,7 @@ class AArch64MCCodeEmitter : public MCCodeEmitter { AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT void operator=(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT public: - AArch64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, - MCContext &ctx) - : Ctx(ctx) {} + AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) : Ctx(ctx) {} ~AArch64MCCodeEmitter() {} @@ -205,9 +203,8 @@ public: MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx) { - return new AArch64MCCodeEmitter(MCII, STI, Ctx); + return new AArch64MCCodeEmitter(MCII, Ctx); } /// getMachineOpValue - Return binary encoding of operand. If the machine diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp index e396df8..9e31508 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELF.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Object/ELF.h" diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 0f7a6b8..38b399d 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -123,94 +123,61 @@ static MCInstPrinter *createAArch64MCInstPrinter(const Target &T, return nullptr; } -static MCStreamer *createMCStreamer(const Target &T, StringRef TT, - MCContext &Ctx, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, bool RelaxAll) { - Triple TheTriple(TT); - - if (TheTriple.isOSDarwin()) - return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll, - /*LabelSections*/ true); - +static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, + MCAsmBackend &TAB, raw_ostream &OS, + MCCodeEmitter *Emitter, bool RelaxAll) { return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll); } +static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, + bool DWARFMustBeAtTheEnd) { + return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll, + DWARFMustBeAtTheEnd, + /*LabelSections*/ true); +} + // Force static initialization. extern "C" void LLVMInitializeAArch64TargetMC() { - // Register the MC asm info. - RegisterMCAsmInfoFn X(TheAArch64leTarget, createAArch64MCAsmInfo); - RegisterMCAsmInfoFn Y(TheAArch64beTarget, createAArch64MCAsmInfo); - RegisterMCAsmInfoFn Z(TheARM64Target, createAArch64MCAsmInfo); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheAArch64leTarget, - createAArch64MCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheAArch64beTarget, - createAArch64MCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheARM64Target, - createAArch64MCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheAArch64leTarget, - createAArch64MCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheAArch64beTarget, - createAArch64MCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheARM64Target, - createAArch64MCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheAArch64leTarget, - createAArch64MCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheAArch64beTarget, - createAArch64MCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheARM64Target, - createAArch64MCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheAArch64leTarget, - createAArch64MCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheAArch64beTarget, - createAArch64MCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheARM64Target, - createAArch64MCSubtargetInfo); + for (Target *T : + {&TheAArch64leTarget, &TheAArch64beTarget, &TheARM64Target}) { + // Register the MC asm info. + RegisterMCAsmInfoFn X(*T, createAArch64MCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(*T, createAArch64MCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createAArch64MCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createAArch64MCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, createAArch64MCSubtargetInfo); + + // Register the MC Code Emitter + TargetRegistry::RegisterMCCodeEmitter(*T, createAArch64MCCodeEmitter); + + // Register the obj streamers. + TargetRegistry::RegisterELFStreamer(*T, createELFStreamer); + TargetRegistry::RegisterMachOStreamer(*T, createMachOStreamer); + + // Register the obj target streamer. + TargetRegistry::RegisterObjectTargetStreamer( + *T, createAArch64ObjectTargetStreamer); + + // Register the asm streamer. + TargetRegistry::RegisterAsmTargetStreamer(*T, + createAArch64AsmTargetStreamer); + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createAArch64MCInstPrinter); + } // Register the asm backend. - TargetRegistry::RegisterMCAsmBackend(TheAArch64leTarget, - createAArch64leAsmBackend); + for (Target *T : {&TheAArch64leTarget, &TheARM64Target}) + TargetRegistry::RegisterMCAsmBackend(*T, createAArch64leAsmBackend); TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget, createAArch64beAsmBackend); - TargetRegistry::RegisterMCAsmBackend(TheARM64Target, - createAArch64leAsmBackend); - - // Register the MC Code Emitter - TargetRegistry::RegisterMCCodeEmitter(TheAArch64leTarget, - createAArch64MCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheAArch64beTarget, - createAArch64MCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheARM64Target, - createAArch64MCCodeEmitter); - - // Register the object streamer. - TargetRegistry::RegisterMCObjectStreamer(TheAArch64leTarget, - createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheAArch64beTarget, - createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheARM64Target, createMCStreamer); - - // Register the asm streamer. - TargetRegistry::RegisterAsmStreamer(TheAArch64leTarget, - createAArch64MCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(TheAArch64beTarget, - createAArch64MCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(TheARM64Target, - createAArch64MCAsmStreamer); - - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(TheAArch64leTarget, - createAArch64MCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheAArch64beTarget, - createAArch64MCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheARM64Target, - createAArch64MCInstPrinter); } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h index 1553115..7ce303b 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -28,8 +28,10 @@ class MCRegisterInfo; class MCObjectWriter; class MCStreamer; class MCSubtargetInfo; +class MCTargetStreamer; class StringRef; class Target; +class Triple; class raw_ostream; extern Target TheAArch64leTarget; @@ -37,9 +39,8 @@ extern Target TheAArch64beTarget; extern Target TheARM64Target; MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx); + const MCRegisterInfo &MRI, + MCContext &Ctx); MCAsmBackend *createAArch64leAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); @@ -53,11 +54,14 @@ MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI, MCObjectWriter *createAArch64MachObjectWriter(raw_ostream &OS, uint32_t CPUType, uint32_t CPUSubtype); -MCStreamer * -createAArch64MCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useDwarfDirectory, - MCInstPrinter *InstPrint, MCCodeEmitter *CE, - MCAsmBackend *TAB, bool ShowInst); +MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm); + +MCTargetStreamer *createAArch64ObjectTargetStreamer(MCStreamer &S, + const MCSubtargetInfo &STI); + } // End llvm namespace // Defines symbolic names for AArch64 registers. This defines a mapping from diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index bc6c7a9..160c1c5 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -19,10 +19,10 @@ using namespace llvm; StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const { - for (unsigned i = 0; i < NumPairs; ++i) { - if (Pairs[i].Value == Value) { + for (unsigned i = 0; i < NumMappings; ++i) { + if (Mappings[i].Value == Value) { Valid = true; - return Pairs[i].Name; + return Mappings[i].Name; } } @@ -32,10 +32,10 @@ StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const { uint32_t AArch64NamedImmMapper::fromString(StringRef Name, bool &Valid) const { std::string LowerCaseName = Name.lower(); - for (unsigned i = 0; i < NumPairs; ++i) { - if (Pairs[i].Name == LowerCaseName) { + for (unsigned i = 0; i < NumMappings; ++i) { + if (Mappings[i].Name == LowerCaseName) { Valid = true; - return Pairs[i].Value; + return Mappings[i].Value; } } @@ -47,7 +47,7 @@ bool AArch64NamedImmMapper::validImm(uint32_t Value) const { return Value < TooBigImm; } -const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATMappings[] = { {"s1e1r", S1E1R}, {"s1e2r", S1E2R}, {"s1e3r", S1E3R}, @@ -63,9 +63,9 @@ const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATPairs[] = { }; AArch64AT::ATMapper::ATMapper() - : AArch64NamedImmMapper(ATPairs, 0) {} + : AArch64NamedImmMapper(ATMappings, 0) {} -const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierMappings[] = { {"oshld", OSHLD}, {"oshst", OSHST}, {"osh", OSH}, @@ -81,9 +81,9 @@ const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierPairs[] }; AArch64DB::DBarrierMapper::DBarrierMapper() - : AArch64NamedImmMapper(DBarrierPairs, 16u) {} + : AArch64NamedImmMapper(DBarrierMappings, 16u) {} -const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCMappings[] = { {"zva", ZVA}, {"ivac", IVAC}, {"isw", ISW}, @@ -95,25 +95,25 @@ const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCPairs[] = { }; AArch64DC::DCMapper::DCMapper() - : AArch64NamedImmMapper(DCPairs, 0) {} + : AArch64NamedImmMapper(DCMappings, 0) {} -const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICMappings[] = { {"ialluis", IALLUIS}, {"iallu", IALLU}, {"ivau", IVAU} }; AArch64IC::ICMapper::ICMapper() - : AArch64NamedImmMapper(ICPairs, 0) {} + : AArch64NamedImmMapper(ICMappings, 0) {} -const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBMappings[] = { {"sy", SY}, }; AArch64ISB::ISBMapper::ISBMapper() - : AArch64NamedImmMapper(ISBPairs, 16) {} + : AArch64NamedImmMapper(ISBMappings, 16) {} -const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMMappings[] = { {"pldl1keep", PLDL1KEEP}, {"pldl1strm", PLDL1STRM}, {"pldl2keep", PLDL2KEEP}, @@ -135,18 +135,18 @@ const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMPairs[] = { }; AArch64PRFM::PRFMMapper::PRFMMapper() - : AArch64NamedImmMapper(PRFMPairs, 32) {} + : AArch64NamedImmMapper(PRFMMappings, 32) {} -const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStatePairs[] = { +const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStateMappings[] = { {"spsel", SPSel}, {"daifset", DAIFSet}, {"daifclr", DAIFClr} }; AArch64PState::PStateMapper::PStateMapper() - : AArch64NamedImmMapper(PStatePairs, 0) {} + : AArch64NamedImmMapper(PStateMappings, 0) {} -const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = { {"mdccsr_el0", MDCCSR_EL0}, {"dbgdtrrx_el0", DBGDTRRX_EL0}, {"mdrar_el1", MDRAR_EL1}, @@ -247,11 +247,11 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSPairs[] = { AArch64SysReg::MRSMapper::MRSMapper(uint64_t FeatureBits) : SysRegMapper(FeatureBits) { - InstPairs = &MRSPairs[0]; - NumInstPairs = llvm::array_lengthof(MRSPairs); + InstMappings = &MRSMappings[0]; + NumInstMappings = llvm::array_lengthof(MRSMappings); } -const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRMappings[] = { {"dbgdtrtx_el0", DBGDTRTX_EL0}, {"oslar_el1", OSLAR_EL1}, {"pmswinc_el0", PMSWINC_EL0}, @@ -271,12 +271,12 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRPairs[] = { AArch64SysReg::MSRMapper::MSRMapper(uint64_t FeatureBits) : SysRegMapper(FeatureBits) { - InstPairs = &MSRPairs[0]; - NumInstPairs = llvm::array_lengthof(MSRPairs); + InstMappings = &MSRMappings[0]; + NumInstMappings = llvm::array_lengthof(MSRMappings); } -const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings[] = { {"osdtrrx_el1", OSDTRRX_EL1}, {"osdtrtx_el1", OSDTRTX_EL1}, {"teecr32_el1", TEECR32_EL1}, @@ -756,7 +756,7 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegPairs[] }; const AArch64NamedImmMapper::Mapping -AArch64SysReg::SysRegMapper::CycloneSysRegPairs[] = { +AArch64SysReg::SysRegMapper::CycloneSysRegMappings[] = { {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3} }; @@ -765,29 +765,29 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { std::string NameLower = Name.lower(); // First search the registers shared by all - for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { - if (SysRegPairs[i].Name == NameLower) { + for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) { + if (SysRegMappings[i].Name == NameLower) { Valid = true; - return SysRegPairs[i].Value; + return SysRegMappings[i].Value; } } // Next search for target specific registers if (FeatureBits & AArch64::ProcCyclone) { - for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) { - if (CycloneSysRegPairs[i].Name == NameLower) { + for (unsigned i = 0; i < array_lengthof(CycloneSysRegMappings); ++i) { + if (CycloneSysRegMappings[i].Name == NameLower) { Valid = true; - return CycloneSysRegPairs[i].Value; + return CycloneSysRegMappings[i].Value; } } } // Now try the instruction-specific registers (either read-only or // write-only). - for (unsigned i = 0; i < NumInstPairs; ++i) { - if (InstPairs[i].Name == NameLower) { + for (unsigned i = 0; i < NumInstMappings; ++i) { + if (InstMappings[i].Name == NameLower) { Valid = true; - return InstPairs[i].Value; + return InstMappings[i].Value; } } @@ -816,26 +816,26 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { std::string AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const { // First search the registers shared by all - for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { - if (SysRegPairs[i].Value == Bits) { - return SysRegPairs[i].Name; + for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) { + if (SysRegMappings[i].Value == Bits) { + return SysRegMappings[i].Name; } } // Next search for target specific registers if (FeatureBits & AArch64::ProcCyclone) { - for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) { - if (CycloneSysRegPairs[i].Value == Bits) { - return CycloneSysRegPairs[i].Name; + for (unsigned i = 0; i < array_lengthof(CycloneSysRegMappings); ++i) { + if (CycloneSysRegMappings[i].Value == Bits) { + return CycloneSysRegMappings[i].Name; } } } // Now try the instruction-specific registers (either read-only or // write-only). - for (unsigned i = 0; i < NumInstPairs; ++i) { - if (InstPairs[i].Value == Bits) { - return InstPairs[i].Name; + for (unsigned i = 0; i < NumInstMappings; ++i) { + if (InstMappings[i].Value == Bits) { + return InstMappings[i].Name; } } @@ -850,7 +850,7 @@ AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const { + "_c" + utostr(CRm) + "_" + utostr(Op2); } -const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIMappings[] = { {"ipas2e1is", IPAS2E1IS}, {"ipas2le1is", IPAS2LE1IS}, {"vmalle1is", VMALLE1IS}, @@ -886,4 +886,4 @@ const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIPairs[] = { }; AArch64TLBI::TLBIMapper::TLBIMapper() - : AArch64NamedImmMapper(TLBIPairs, 0) {} + : AArch64NamedImmMapper(TLBIMappings, 0) {} diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index c60b09a..2ae6f52 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -283,8 +283,8 @@ struct AArch64NamedImmMapper { }; template<int N> - AArch64NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm) - : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {} + AArch64NamedImmMapper(const Mapping (&Mappings)[N], uint32_t TooBigImm) + : Mappings(&Mappings[0]), NumMappings(N), TooBigImm(TooBigImm) {} StringRef toString(uint32_t Value, bool &Valid) const; uint32_t fromString(StringRef Name, bool &Valid) const; @@ -294,8 +294,8 @@ struct AArch64NamedImmMapper { /// N being 0 indicates no immediate syntax-form is allowed. bool validImm(uint32_t Value) const; protected: - const Mapping *Pairs; - size_t NumPairs; + const Mapping *Mappings; + size_t NumMappings; uint32_t TooBigImm; }; @@ -317,7 +317,7 @@ namespace AArch64AT { }; struct ATMapper : AArch64NamedImmMapper { - const static Mapping ATPairs[]; + const static Mapping ATMappings[]; ATMapper(); }; @@ -341,7 +341,7 @@ namespace AArch64DB { }; struct DBarrierMapper : AArch64NamedImmMapper { - const static Mapping DBarrierPairs[]; + const static Mapping DBarrierMappings[]; DBarrierMapper(); }; @@ -361,7 +361,7 @@ namespace AArch64DC { }; struct DCMapper : AArch64NamedImmMapper { - const static Mapping DCPairs[]; + const static Mapping DCMappings[]; DCMapper(); }; @@ -378,7 +378,7 @@ namespace AArch64IC { struct ICMapper : AArch64NamedImmMapper { - const static Mapping ICPairs[]; + const static Mapping ICMappings[]; ICMapper(); }; @@ -394,7 +394,7 @@ namespace AArch64ISB { SY = 0xf }; struct ISBMapper : AArch64NamedImmMapper { - const static Mapping ISBPairs[]; + const static Mapping ISBMappings[]; ISBMapper(); }; @@ -424,7 +424,7 @@ namespace AArch64PRFM { }; struct PRFMMapper : AArch64NamedImmMapper { - const static Mapping PRFMPairs[]; + const static Mapping PRFMMappings[]; PRFMMapper(); }; @@ -439,7 +439,7 @@ namespace AArch64PState { }; struct PStateMapper : AArch64NamedImmMapper { - const static Mapping PStatePairs[]; + const static Mapping PStateMappings[]; PStateMapper(); }; @@ -1134,11 +1134,11 @@ namespace AArch64SysReg { // burdening the common AArch64NamedImmMapper with abstractions only needed in // this one case. struct SysRegMapper { - static const AArch64NamedImmMapper::Mapping SysRegPairs[]; - static const AArch64NamedImmMapper::Mapping CycloneSysRegPairs[]; + static const AArch64NamedImmMapper::Mapping SysRegMappings[]; + static const AArch64NamedImmMapper::Mapping CycloneSysRegMappings[]; - const AArch64NamedImmMapper::Mapping *InstPairs; - size_t NumInstPairs; + const AArch64NamedImmMapper::Mapping *InstMappings; + size_t NumInstMappings; uint64_t FeatureBits; SysRegMapper(uint64_t FeatureBits) : FeatureBits(FeatureBits) { } @@ -1147,12 +1147,12 @@ namespace AArch64SysReg { }; struct MSRMapper : SysRegMapper { - static const AArch64NamedImmMapper::Mapping MSRPairs[]; + static const AArch64NamedImmMapper::Mapping MSRMappings[]; MSRMapper(uint64_t FeatureBits); }; struct MRSMapper : SysRegMapper { - static const AArch64NamedImmMapper::Mapping MRSPairs[]; + static const AArch64NamedImmMapper::Mapping MRSMappings[]; MRSMapper(uint64_t FeatureBits); }; @@ -1197,7 +1197,7 @@ namespace AArch64TLBI { }; struct TLBIMapper : AArch64NamedImmMapper { - const static Mapping TLBIPairs[]; + const static Mapping TLBIMappings[]; TLBIMapper(); }; @@ -1229,7 +1229,7 @@ namespace AArch64II { MO_NO_FLAG, - MO_FRAGMENT = 0x7, + MO_FRAGMENT = 0xf, /// MO_PAGE - A symbol operand with this flag represents the pc-relative /// offset of the 4K page containing the symbol. This is used with the @@ -1257,26 +1257,31 @@ namespace AArch64II { /// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction MO_G0 = 6, + /// MO_HI12 - This flag indicates that a symbol operand represents the bits + /// 13-24 of a 64-bit address, used in a arithmetic immediate-shifted-left- + /// by-12-bits instruction. + MO_HI12 = 7, + /// MO_GOT - This flag indicates that a symbol operand represents the /// address of the GOT entry for the symbol, rather than the address of /// the symbol itself. - MO_GOT = 8, + MO_GOT = 0x10, /// MO_NC - Indicates whether the linker is expected to check the symbol /// reference for overflow. For example in an ADRP/ADD pair of relocations /// the ADRP usually does check, but not the ADD. - MO_NC = 0x10, + MO_NC = 0x20, /// MO_TLS - Indicates that the operand being accessed is some kind of /// thread-local symbol. On Darwin, only one type of thread-local access /// exists (pre linker-relaxation), but on ELF the TLSModel used for the /// referee will affect interpretation. - MO_TLS = 0x20, + MO_TLS = 0x40, /// MO_CONSTPOOL - This flag indicates that a symbol operand represents /// the address of a constant pool entry for the symbol, rather than the /// address of the symbol itself. - MO_CONSTPOOL = 0x40 + MO_CONSTPOOL = 0x80 }; } // end namespace AArch64II |