aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/AArch64
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AArch64')
-rw-r--r--lib/Target/AArch64/AArch64.td7
-rw-r--r--lib/Target/AArch64/AArch64A53Fix835769.cpp1
-rw-r--r--lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp26
-rw-r--r--lib/Target/AArch64/AArch64AddressTypePromotion.cpp1
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp77
-rw-r--r--lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp6
-rw-r--r--lib/Target/AArch64/AArch64CollectLOH.cpp14
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp4
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp47
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp302
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h35
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp10
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h7
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td297
-rw-r--r--lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp127
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp11
-rw-r--r--lib/Target/AArch64/AArch64PBQPRegAlloc.cpp2
-rw-r--r--lib/Target/AArch64/AArch64PromoteConstant.cpp7
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp47
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.h19
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp2
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h3
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp31
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h7
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.cpp21
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.h7
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp12
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp125
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp21
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp23
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp1
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp129
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h20
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.cpp96
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h51
36 files changed, 956 insertions, 647 deletions
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index dff48f9..bb3db4b 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -32,6 +32,9 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
"Enable ARMv8 CRC-32 checksum instructions">;
+def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true",
+ "Enable ARMv8.1a extensions", [FeatureCRC]>;
+
/// Cyclone has register move instructions which are "free".
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
"Has zero-cycle register moves">;
@@ -89,6 +92,10 @@ def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
FeatureNEON,
FeatureCRC]>;
+def : ProcessorModel<"generic-armv8.1-a", NoSchedModel, [FeatureV8_1a,
+ FeatureNEON,
+ FeatureCrypto]>;
+
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
// FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
diff --git a/lib/Target/AArch64/AArch64A53Fix835769.cpp b/lib/Target/AArch64/AArch64A53Fix835769.cpp
index dd401c6..3bc5a54 100644
--- a/lib/Target/AArch64/AArch64A53Fix835769.cpp
+++ b/lib/Target/AArch64/AArch64A53Fix835769.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index 2cf3c22..bffd9e6 100644
--- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -142,7 +142,7 @@ private:
int scavengeRegister(Chain *G, Color C, MachineBasicBlock &MBB);
void scanInstruction(MachineInstr *MI, unsigned Idx,
std::map<unsigned, Chain*> &Active,
- std::set<std::unique_ptr<Chain>> &AllChains);
+ std::vector<std::unique_ptr<Chain>> &AllChains);
void maybeKillChain(MachineOperand &MO, unsigned Idx,
std::map<unsigned, Chain*> &RegChains);
Color getColor(unsigned Register);
@@ -287,12 +287,12 @@ public:
raw_string_ostream OS(S);
OS << "{";
- StartInst->print(OS, NULL, true);
+ StartInst->print(OS, /* SkipOpers= */true);
OS << " -> ";
- LastInst->print(OS, NULL, true);
+ LastInst->print(OS, /* SkipOpers= */true);
if (KillInst) {
OS << " (kill @ ";
- KillInst->print(OS, NULL, true);
+ KillInst->print(OS, /* SkipOpers= */true);
OS << ")";
}
OS << "}";
@@ -307,6 +307,11 @@ public:
//===----------------------------------------------------------------------===//
bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction &F) {
+ // Don't do anything if this isn't an A53 or A57.
+ if (!(F.getSubtarget<AArch64Subtarget>().isCortexA53() ||
+ F.getSubtarget<AArch64Subtarget>().isCortexA57()))
+ return false;
+
bool Changed = false;
DEBUG(dbgs() << "***** AArch64A57FPLoadBalancing *****\n");
@@ -331,7 +336,7 @@ bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) {
// been killed yet. This is keyed by register - all chains can only have one
// "link" register between each inst in the chain.
std::map<unsigned, Chain*> ActiveChains;
- std::set<std::unique_ptr<Chain>> AllChains;
+ std::vector<std::unique_ptr<Chain>> AllChains;
unsigned Idx = 0;
for (auto &MI : MBB)
scanInstruction(&MI, Idx++, ActiveChains, AllChains);
@@ -598,10 +603,9 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C,
return Changed;
}
-void AArch64A57FPLoadBalancing::
-scanInstruction(MachineInstr *MI, unsigned Idx,
- std::map<unsigned, Chain*> &ActiveChains,
- std::set<std::unique_ptr<Chain>> &AllChains) {
+void AArch64A57FPLoadBalancing::scanInstruction(
+ MachineInstr *MI, unsigned Idx, std::map<unsigned, Chain *> &ActiveChains,
+ std::vector<std::unique_ptr<Chain>> &AllChains) {
// Inspect "MI", updating ActiveChains and AllChains.
if (isMul(MI)) {
@@ -620,7 +624,7 @@ scanInstruction(MachineInstr *MI, unsigned Idx,
auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
ActiveChains[DestReg] = G.get();
- AllChains.insert(std::move(G));
+ AllChains.push_back(std::move(G));
} else if (isMla(MI)) {
@@ -664,7 +668,7 @@ scanInstruction(MachineInstr *MI, unsigned Idx,
<< TRI->getName(DestReg) << "\n");
auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
ActiveChains[DestReg] = G.get();
- AllChains.insert(std::move(G));
+ AllChains.push_back(std::move(G));
} else {
diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
index 287989f..716e1a3 100644
--- a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
+++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
@@ -41,6 +41,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index d64d851..1b4483a 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -12,12 +12,14 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "AArch64.h"
#include "AArch64MCInstLower.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "InstPrinter/AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
@@ -34,8 +36,10 @@
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -49,7 +53,7 @@ class AArch64AsmPrinter : public AsmPrinter {
public:
AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this),
- SM(*this), AArch64FI(nullptr), LOHLabelCounter(0) {}
+ SM(*this), AArch64FI(nullptr) {}
const char *getPassName() const override {
return "AArch64 Assembly Printer";
@@ -110,7 +114,6 @@ private:
typedef std::map<const MachineInstr *, MCSymbol *> MInstToMCSymbol;
MInstToMCSymbol LOHInstToLabel;
- unsigned LOHLabelCounter;
};
} // end of anonymous namespace
@@ -219,6 +222,17 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
O << '#' << Imm;
break;
}
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+ MCSymbol *Sym = getSymbol(GV);
+
+ // FIXME: Can we get anything other than a plain symbol here?
+ assert(!MO.getTargetFlags() && "Unknown operand target flag!");
+
+ O << *Sym;
+ printOffset(MO.getOffset(), O);
+ break;
+ }
}
}
@@ -450,7 +464,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (AArch64FI->getLOHRelated().count(MI)) {
// Generate a label for LOH related instruction
- MCSymbol *LOHLabel = GetTempSymbol("loh", LOHLabelCounter++);
+ MCSymbol *LOHLabel = createTempSymbol("loh");
// Associate the instruction with the label
LOHInstToLabel[MI] = LOHLabel;
OutStreamer.EmitLabel(LOHLabel);
@@ -489,24 +503,57 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(OutStreamer, TmpInst);
return;
}
- case AArch64::TLSDESC_BLR: {
- MCOperand Callee, Sym;
- MCInstLowering.lowerOperand(MI->getOperand(0), Callee);
- MCInstLowering.lowerOperand(MI->getOperand(1), Sym);
-
- // First emit a relocation-annotation. This expands to no code, but requests
+ case AArch64::TLSDESC_CALLSEQ: {
+ /// lower this to:
+ /// adrp x0, :tlsdesc:var
+ /// ldr x1, [x0, #:tlsdesc_lo12:var]
+ /// add x0, x0, #:tlsdesc_lo12:var
+ /// .tlsdesccall var
+ /// blr x1
+ /// (TPIDR_EL0 offset now in x0)
+ const MachineOperand &MO_Sym = MI->getOperand(0);
+ MachineOperand MO_TLSDESC_LO12(MO_Sym), MO_TLSDESC(MO_Sym);
+ MCOperand Sym, SymTLSDescLo12, SymTLSDesc;
+ MO_TLSDESC_LO12.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
+ MO_TLSDESC.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGE);
+ MCInstLowering.lowerOperand(MO_Sym, Sym);
+ MCInstLowering.lowerOperand(MO_TLSDESC_LO12, SymTLSDescLo12);
+ MCInstLowering.lowerOperand(MO_TLSDESC, SymTLSDesc);
+
+ MCInst Adrp;
+ Adrp.setOpcode(AArch64::ADRP);
+ Adrp.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Adrp.addOperand(SymTLSDesc);
+ EmitToStreamer(OutStreamer, Adrp);
+
+ MCInst Ldr;
+ Ldr.setOpcode(AArch64::LDRXui);
+ Ldr.addOperand(MCOperand::CreateReg(AArch64::X1));
+ Ldr.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Ldr.addOperand(SymTLSDescLo12);
+ Ldr.addOperand(MCOperand::CreateImm(0));
+ EmitToStreamer(OutStreamer, Ldr);
+
+ MCInst Add;
+ Add.setOpcode(AArch64::ADDXri);
+ Add.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Add.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Add.addOperand(SymTLSDescLo12);
+ Add.addOperand(MCOperand::CreateImm(AArch64_AM::getShiftValue(0)));
+ EmitToStreamer(OutStreamer, Add);
+
+ // Emit a relocation-annotation. This expands to no code, but requests
// the following instruction gets an R_AARCH64_TLSDESC_CALL.
MCInst TLSDescCall;
TLSDescCall.setOpcode(AArch64::TLSDESCCALL);
TLSDescCall.addOperand(Sym);
EmitToStreamer(OutStreamer, TLSDescCall);
- // Other than that it's just a normal indirect call to the function loaded
- // from the descriptor.
- MCInst BLR;
- BLR.setOpcode(AArch64::BLR);
- BLR.addOperand(Callee);
- EmitToStreamer(OutStreamer, BLR);
+ MCInst Blr;
+ Blr.setOpcode(AArch64::BLR);
+ Blr.addOperand(MCOperand::CreateReg(AArch64::X1));
+ EmitToStreamer(OutStreamer, Blr);
return;
}
diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index 3b74481..06ff9af 100644
--- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -62,10 +62,10 @@ struct LDTLSCleanup : public MachineFunctionPass {
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
++I) {
switch (I->getOpcode()) {
- case AArch64::TLSDESC_BLR:
+ case AArch64::TLSDESC_CALLSEQ:
// Make sure it's a local dynamic access.
- if (!I->getOperand(1).isSymbol() ||
- strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
+ if (!I->getOperand(0).isSymbol() ||
+ strcmp(I->getOperand(0).getSymbolName(), "_TLS_MODULE_BASE_"))
break;
if (TLSBaseAddrReg)
diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp
index 938dcb3..568f258 100644
--- a/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -279,7 +279,7 @@ static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg,
/// definition. It also consider definitions of ADRP instructions as uses and
/// ignore other uses. The ADRPMode is used to collect the information for LHO
/// that involve ADRP operation only.
-static void initReachingDef(MachineFunction &MF,
+static void initReachingDef(const MachineFunction &MF,
InstrToInstrs *ColorOpToReachedUses,
BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
BlockToSetOfInstrsPerColor &ReachableUses,
@@ -288,7 +288,7 @@ static void initReachingDef(MachineFunction &MF,
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
unsigned NbReg = RegToId.size();
- for (MachineBasicBlock &MBB : MF) {
+ for (const MachineBasicBlock &MBB : MF) {
auto &BBGen = Gen[&MBB];
BBGen = make_unique<const MachineInstr *[]>(NbReg);
std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr);
@@ -382,7 +382,7 @@ static void initReachingDef(MachineFunction &MF,
/// op.reachedUses
///
/// Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
-static void reachingDefAlgorithm(MachineFunction &MF,
+static void reachingDefAlgorithm(const MachineFunction &MF,
InstrToInstrs *ColorOpToReachedUses,
BlockToSetOfInstrsPerColor &In,
BlockToSetOfInstrsPerColor &Out,
@@ -392,7 +392,7 @@ static void reachingDefAlgorithm(MachineFunction &MF,
bool HasChanged;
do {
HasChanged = false;
- for (MachineBasicBlock &MBB : MF) {
+ for (const MachineBasicBlock &MBB : MF) {
unsigned CurReg;
for (CurReg = 0; CurReg < NbReg; ++CurReg) {
SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg);
@@ -401,7 +401,7 @@ static void reachingDefAlgorithm(MachineFunction &MF,
SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg);
unsigned Size = BBOutSet.size();
// In[bb][color] = U Out[bb.predecessors][color]
- for (MachineBasicBlock *PredMBB : MBB.predecessors()) {
+ for (const MachineBasicBlock *PredMBB : MBB.predecessors()) {
SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg);
BBInSet.insert(PredOutSet.begin(), PredOutSet.end());
}
@@ -433,7 +433,7 @@ static void reachingDefAlgorithm(MachineFunction &MF,
/// @p DummyOp.
/// \pre ColorOpToReachedUses is an array of at least number of registers of
/// InstrToInstrs.
-static void reachingDef(MachineFunction &MF,
+static void reachingDef(const MachineFunction &MF,
InstrToInstrs *ColorOpToReachedUses,
const MapRegToId &RegToId, bool ADRPMode = false,
const MachineInstr *DummyOp = nullptr) {
@@ -983,7 +983,7 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
/// Look for every register defined by potential LOHs candidates.
/// Map these registers with dense id in @p RegToId and vice-versa in
/// @p IdToReg. @p IdToReg is populated only in DEBUG mode.
-static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId,
+static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId,
MapIdToReg &IdToReg,
const TargetRegisterInfo *TRI) {
unsigned CurRegId = 0;
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 61017c1..99cb641 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -3158,7 +3158,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CC));
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
CLI.Call = MIB;
@@ -4563,7 +4563,7 @@ bool AArch64FastISel::selectShift(const Instruction *I) {
unsigned ResultReg = 0;
uint64_t ShiftVal = C->getZExtValue();
MVT SrcVT = RetVT;
- bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
+ bool IsZExt = I->getOpcode() != Instruction::AShr;
const Value *Op0 = I->getOperand(0);
if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
if (!isIntExtFree(ZExt)) {
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index ac11c4d..0a47dcb 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -65,7 +65,7 @@ public:
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
SDNode *SelectMLAV64LaneV128(SDNode *N);
@@ -211,13 +211,20 @@ static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
}
bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
- assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
- // Require the address to be in a register. That is safe for all AArch64
- // variants and it is hard to do anything much smarter without knowing
- // how the operand is used.
- OutOps.push_back(Op);
- return false;
+ const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+ switch(ConstraintID) {
+ default:
+ llvm_unreachable("Unexpected asm memory constraint");
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_Q:
+ // Require the address to be in a register. That is safe for all AArch64
+ // variants and it is hard to do anything much smarter without knowing
+ // how the operand is used.
+ OutOps.push_back(Op);
+ return false;
+ }
+ return true;
}
/// SelectArithImmed - Select an immediate value that can be represented as
@@ -299,7 +306,7 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
}
}
-/// \brief Determine wether it is worth to fold V into an extended register.
+/// \brief Determine whether it is worth to fold V into an extended register.
bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
// it hurts if the value is used at least twice, unless we are optimizing
// for code size.
@@ -1055,7 +1062,7 @@ SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs,
SDValue Ops[] = {N->getOperand(2), // Mem operand;
Chain};
- EVT ResTys[] = {MVT::Untyped, MVT::Other};
+ const EVT ResTys[] = {MVT::Untyped, MVT::Other};
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
SDValue SuperReg = SDValue(Ld, 0);
@@ -1077,8 +1084,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
N->getOperand(2), // Incremental
Chain};
- EVT ResTys[] = {MVT::i64, // Type of the write back register
- MVT::Untyped, MVT::Other};
+ const EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Untyped, MVT::Other};
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
@@ -1119,8 +1126,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
unsigned Opc) {
SDLoc dl(N);
EVT VT = N->getOperand(2)->getValueType(0);
- EVT ResTys[] = {MVT::i64, // Type of the write back register
- MVT::Other}; // Type for the Chain
+ const EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Other}; // Type for the Chain
// Form a REG_SEQUENCE to force register allocation.
bool Is128Bit = VT.getSizeInBits() == 128;
@@ -1136,6 +1143,7 @@ SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
return St;
}
+namespace {
/// WidenVector - Given a value in the V64 register class, produce the
/// equivalent value in the V128 register class.
class WidenVector {
@@ -1156,6 +1164,7 @@ public:
return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
}
};
+} // namespace
/// NarrowVector - Given a value in the V128 register class, produce the
/// equivalent value in the V64 register class.
@@ -1184,7 +1193,7 @@ SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
SDValue RegSeq = createQTuple(Regs);
- EVT ResTys[] = {MVT::Untyped, MVT::Other};
+ const EVT ResTys[] = {MVT::Untyped, MVT::Other};
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
@@ -1224,8 +1233,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
SDValue RegSeq = createQTuple(Regs);
- EVT ResTys[] = {MVT::i64, // Type of the write back register
- MVT::Untyped, MVT::Other};
+ const EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Untyped, MVT::Other};
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
@@ -1309,8 +1318,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
SDValue RegSeq = createQTuple(Regs);
- EVT ResTys[] = {MVT::i64, // Type of the write back register
- MVT::Other};
+ const EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Other};
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index a1b324e..0c0e856 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -64,8 +64,16 @@ EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden,
static cl::opt<bool>
EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
- cl::desc("Allow AArch64 SLI/SRI formation"),
- cl::init(false));
+ cl::desc("Allow AArch64 SLI/SRI formation"),
+ cl::init(false));
+
+// FIXME: The necessary dtprel relocations don't seem to be supported
+// well in the GNU bfd and gold linkers at the moment. Therefore, by
+// default, for now, fall back to GeneralDynamic code generation.
+cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
+ "aarch64-elf-ldtls-generation", cl::Hidden,
+ cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
+ cl::init(false));
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
@@ -362,9 +370,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
// AArch64 has implementations of a lot of rounding-like FP operations.
- static MVT RoundingTypes[] = { MVT::f32, MVT::f64};
- for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) {
- MVT Ty = RoundingTypes[I];
+ for (MVT Ty : {MVT::f32, MVT::f64}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
@@ -561,9 +567,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
// AArch64 has implementations of a lot of rounding-like FP operations.
- static MVT RoundingVecTypes[] = {MVT::v2f32, MVT::v4f32, MVT::v2f64 };
- for (unsigned I = 0; I < array_lengthof(RoundingVecTypes); ++I) {
- MVT Ty = RoundingVecTypes[I];
+ for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
@@ -752,7 +756,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
- case AArch64ISD::TLSDESC_CALL: return "AArch64ISD::TLSDESC_CALL";
+ case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
case AArch64ISD::ADC: return "AArch64ISD::ADC";
case AArch64ISD::SBC: return "AArch64ISD::SBC";
case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
@@ -811,6 +815,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
+ case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
+ case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
+ case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
+ case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
+ case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
+ case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
case AArch64ISD::NOT: return "AArch64ISD::NOT";
case AArch64ISD::BIT: return "AArch64ISD::BIT";
case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
@@ -1247,7 +1257,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
case ISD::SMULO:
case ISD::UMULO: {
CC = AArch64CC::NE;
- bool IsSigned = (Op.getOpcode() == ISD::SMULO) ? true : false;
+ bool IsSigned = Op.getOpcode() == ISD::SMULO;
if (Op.getValueType() == MVT::i32) {
unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
// For a 32 bit multiply with overflow check we want the instruction
@@ -2784,13 +2794,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
if (IsThisReturn) {
// For 'this' returns, use the X0-preserving mask if applicable
- Mask = TRI->getThisReturnPreservedMask(CallConv);
+ Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
if (!Mask) {
IsThisReturn = false;
- Mask = TRI->getCallPreservedMask(CallConv);
+ Mask = TRI->getCallPreservedMask(MF, CallConv);
}
} else
- Mask = TRI->getCallPreservedMask(CallConv);
+ Mask = TRI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -3027,58 +3037,34 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
/// When accessing thread-local variables under either the general-dynamic or
/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
-/// is a function pointer to carry out the resolution. This function takes the
-/// address of the descriptor in X0 and returns the TPIDR_EL0 offset in X0. All
-/// other registers (except LR, NZCV) are preserved.
-///
-/// Thus, the ideal call sequence on AArch64 is:
-///
-/// adrp x0, :tlsdesc:thread_var
-/// ldr x8, [x0, :tlsdesc_lo12:thread_var]
-/// add x0, x0, :tlsdesc_lo12:thread_var
-/// .tlsdesccall thread_var
-/// blr x8
-/// (TPIDR_EL0 offset now in x0).
+/// is a function pointer to carry out the resolution.
///
-/// The ".tlsdesccall" directive instructs the assembler to insert a particular
-/// relocation to help the linker relax this sequence if it turns out to be too
-/// conservative.
+/// The sequence is:
+/// adrp x0, :tlsdesc:var
+/// ldr x1, [x0, #:tlsdesc_lo12:var]
+/// add x0, x0, #:tlsdesc_lo12:var
+/// .tlsdesccall var
+/// blr x1
+/// (TPIDR_EL0 offset now in x0)
///
-/// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this
-/// is harmless.
-SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr,
- SDValue DescAddr, SDLoc DL,
- SelectionDAG &DAG) const {
+/// The above sequence must be produced unscheduled, to enable the linker to
+/// optimize/relax this sequence.
+/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
+/// above sequence, and expanded really late in the compilation flow, to ensure
+/// the sequence is produced as per above.
+SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
+ SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
- // The function we need to call is simply the first entry in the GOT for this
- // descriptor, load it in preparation.
- SDValue Func = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, SymAddr);
-
- // TLS calls preserve all registers except those that absolutely must be
- // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
- // silly).
- const uint32_t *Mask =
- Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
-
- // The function takes only one argument: the address of the descriptor itself
- // in X0.
- SDValue Glue, Chain;
- Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
- Glue = Chain.getValue(1);
+ SDValue Chain = DAG.getEntryNode();
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- // We're now ready to populate the argument list, as with a normal call:
- SmallVector<SDValue, 6> Ops;
+ SmallVector<SDValue, 2> Ops;
Ops.push_back(Chain);
- Ops.push_back(Func);
Ops.push_back(SymAddr);
- Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
- Ops.push_back(DAG.getRegisterMask(Mask));
- Ops.push_back(Glue);
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(AArch64ISD::TLSDESC_CALL, DL, NodeTys, Ops);
- Glue = Chain.getValue(1);
+ Chain = DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, Ops);
+ SDValue Glue = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
}
@@ -3089,9 +3075,18 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
assert(Subtarget->isTargetELF() && "This function expects an ELF target");
assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
"ELF TLS only supported in small memory model");
+ // Different choices can be made for the maximum size of the TLS area for a
+ // module. For the small address model, the default TLS size is 16MiB and the
+ // maximum TLS size is 4GiB.
+ // FIXME: add -mtls-size command line option and make it control the 16MiB
+ // vs. 4GiB code sequence generation.
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
+ if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
+ if (Model == TLSModel::LocalDynamic)
+ Model = TLSModel::GeneralDynamic;
+ }
SDValue TPOff;
EVT PtrVT = getPointerTy();
@@ -3102,17 +3097,20 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
if (Model == TLSModel::LocalExec) {
SDValue HiVar = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+ GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
- AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
+ AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
- DAG.getTargetConstant(16, MVT::i32)),
- 0);
- TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
- DAG.getTargetConstant(0, MVT::i32)),
- 0);
+ SDValue TPWithOff_lo =
+ SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
+ HiVar, DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ SDValue TPWithOff =
+ SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
+ LoVar, DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ return TPWithOff;
} else if (Model == TLSModel::InitialExec) {
TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
@@ -3127,19 +3125,6 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
MFI->incNumLocalDynamicTLSAccesses();
- // Accesses used in this sequence go via the TLS descriptor which lives in
- // the GOT. Prepare an address we can use to handle this.
- SDValue HiDesc = DAG.getTargetExternalSymbol(
- "_TLS_MODULE_BASE_", PtrVT, AArch64II::MO_TLS | AArch64II::MO_PAGE);
- SDValue LoDesc = DAG.getTargetExternalSymbol(
- "_TLS_MODULE_BASE_", PtrVT,
- AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
-
- // First argument to the descriptor call is the address of the descriptor
- // itself.
- SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
- DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
// The call needs a relocation too for linker relaxation. It doesn't make
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
// the address.
@@ -3148,40 +3133,23 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
// Now we can calculate the offset from TPIDR_EL0 to this module's
// thread-local area.
- TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
+ TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
// Now use :dtprel_whatever: operations to calculate this variable's offset
// in its thread-storage area.
SDValue HiVar = DAG.getTargetGlobalAddress(
- GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+ GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, MVT::i64, 0,
- AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
-
- SDValue DTPOff =
- SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
- DAG.getTargetConstant(16, MVT::i32)),
- 0);
- DTPOff =
- SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, DTPOff, LoVar,
- DAG.getTargetConstant(0, MVT::i32)),
- 0);
-
- TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff);
- } else if (Model == TLSModel::GeneralDynamic) {
- // Accesses used in this sequence go via the TLS descriptor which lives in
- // the GOT. Prepare an address we can use to handle this.
- SDValue HiDesc = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGE);
- SDValue LoDesc = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- // First argument to the descriptor call is the address of the descriptor
- // itself.
- SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
- DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
+ TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
+ DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
+ DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ } else if (Model == TLSModel::GeneralDynamic) {
// The call needs a relocation too for linker relaxation. It doesn't make
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
// the address.
@@ -3189,7 +3157,7 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
// Finally we can make a call to calculate the offset from tpidr_el0.
- TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
+ TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
} else
llvm_unreachable("Unsupported ELF TLS access model");
@@ -3356,11 +3324,12 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
EVT VecVT;
EVT EltVT;
- SDValue EltMask, VecVal1, VecVal2;
+ uint64_t EltMask;
+ SDValue VecVal1, VecVal2;
if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
EltVT = MVT::i32;
VecVT = MVT::v4i32;
- EltMask = DAG.getConstant(0x80000000ULL, EltVT);
+ EltMask = 0x80000000ULL;
if (!VT.isVector()) {
VecVal1 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
@@ -3378,7 +3347,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
// We want to materialize a mask with the the high bit set, but the AdvSIMD
// immediate moves cannot materialize that in a single instruction for
// 64-bit elements. Instead, materialize zero and then negate it.
- EltMask = DAG.getConstant(0, EltVT);
+ EltMask = 0;
if (!VT.isVector()) {
VecVal1 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT,
@@ -3393,11 +3362,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
llvm_unreachable("Invalid type for copysign!");
}
- std::vector<SDValue> BuildVectorOps;
- for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i)
- BuildVectorOps.push_back(EltMask);
-
- SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, BuildVectorOps);
+ SDValue BuildVec = DAG.getConstant(EltMask, VecVT);
// If we couldn't materialize the mask above, then the mask vector will be
// the zero vector, and we need to negate it here.
@@ -5927,8 +5892,10 @@ FailedModImm:
if (VT.getVectorElementType().isFloatingPoint()) {
SmallVector<SDValue, 8> Ops;
- MVT NewType =
- (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+ EVT EltTy = VT.getVectorElementType();
+ assert ((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) &&
+ "Unsupported floating-point vector type");
+ MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
@@ -6781,7 +6748,7 @@ bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
unsigned LZ = countLeadingZeros((uint64_t)Val);
unsigned Shift = (63 - LZ) / 16;
// MOVZ is free so return true for one or fewer MOVK.
- return (Shift < 3) ? true : false;
+ return Shift < 3;
}
// Generate SUBS and CSEL for integer abs.
@@ -6898,6 +6865,15 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
N->getOperand(0));
}
} else {
+ // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+ APInt VNP1 = -Value + 1;
+ if (VNP1.isPowerOf2()) {
+ SDValue ShiftedVal =
+ DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
+ DAG.getConstant(VNP1.logBase2(), MVT::i64));
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, N->getOperand(0),
+ ShiftedVal);
+ }
// (mul x, -(2^N + 1)) => - (add (shl x, N), x)
APInt VNM1 = -Value - 1;
if (VNM1.isPowerOf2()) {
@@ -6908,15 +6884,6 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), Add);
}
- // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
- APInt VNP1 = -Value + 1;
- if (VNP1.isPowerOf2()) {
- SDValue ShiftedVal =
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
- DAG.getConstant(VNP1.logBase2(), MVT::i64));
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, N->getOperand(0),
- ShiftedVal);
- }
}
}
return SDValue();
@@ -7211,21 +7178,54 @@ static SDValue performBitcastCombine(SDNode *N,
static SDValue performConcatVectorsCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+
+ // Optimize concat_vectors of truncated vectors, where the intermediate
+ // type is illegal, to avoid said illegality, e.g.,
+ // (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
+ // (v2i16 (truncate (v2i64)))))
+ // ->
+ // (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
+ // (v4i32 (bitcast (v2i64))),
+ // <0, 2, 4, 6>)))
+ // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
+ // on both input and result type, so we might generate worse code.
+ // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
+ if (N->getNumOperands() == 2 &&
+ N0->getOpcode() == ISD::TRUNCATE &&
+ N1->getOpcode() == ISD::TRUNCATE) {
+ SDValue N00 = N0->getOperand(0);
+ SDValue N10 = N1->getOperand(0);
+ EVT N00VT = N00.getValueType();
+
+ if (N00VT == N10.getValueType() &&
+ (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
+ N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
+ MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
+ SmallVector<int, 8> Mask(MidVT.getVectorNumElements());
+ for (size_t i = 0; i < Mask.size(); ++i)
+ Mask[i] = i * 2;
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getVectorShuffle(
+ MidVT, dl,
+ DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
+ DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
+ }
+ }
+
// Wait 'til after everything is legalized to try this. That way we have
// legal vector types and such.
if (DCI.isBeforeLegalizeOps())
return SDValue();
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
-
// If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
// splat. The indexed instructions are going to be expecting a DUPLANE64, so
// canonicalise to that.
- if (N->getOperand(0) == N->getOperand(1) && VT.getVectorNumElements() == 2) {
+ if (N0 == N1 && VT.getVectorNumElements() == 2) {
assert(VT.getVectorElementType().getSizeInBits() == 64);
- return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT,
- WidenVector(N->getOperand(0), DAG),
+ return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
DAG.getConstant(0, MVT::i64));
}
@@ -7238,10 +7238,9 @@ static SDValue performConcatVectorsCombine(SDNode *N,
// becomes
// (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
- SDValue Op1 = N->getOperand(1);
- if (Op1->getOpcode() != ISD::BITCAST)
+ if (N1->getOpcode() != ISD::BITCAST)
return SDValue();
- SDValue RHS = Op1->getOperand(0);
+ SDValue RHS = N1->getOperand(0);
MVT RHSTy = RHS.getValueType().getSimpleVT();
// If the RHS is not a vector, this is not the pattern we're looking for.
if (!RHSTy.isVector())
@@ -7251,10 +7250,10 @@ static SDValue performConcatVectorsCombine(SDNode *N,
MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
RHSTy.getVectorNumElements() * 2);
- return DAG.getNode(
- ISD::BITCAST, dl, VT,
- DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
- DAG.getNode(ISD::BITCAST, dl, RHSTy, N->getOperand(0)), RHS));
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
+ DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
+ RHS));
}
static SDValue tryCombineFixedPointConvert(SDNode *N,
@@ -7651,6 +7650,15 @@ static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
}
+static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N,
+ SelectionDAG &DAG) {
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
+ DAG.getNode(Opc, SDLoc(N),
+ N->getOperand(1).getSimpleValueType(),
+ N->getOperand(1)),
+ DAG.getConstant(0, MVT::i64));
+}
+
static SDValue performIntrinsicCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
@@ -7663,6 +7671,18 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_neon_vcvtfxu2fp:
return tryCombineFixedPointConvert(N, DCI, DAG);
break;
+ case Intrinsic::aarch64_neon_saddv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG);
+ case Intrinsic::aarch64_neon_uaddv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::UADDV, N, DAG);
+ case Intrinsic::aarch64_neon_sminv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::SMINV, N, DAG);
+ case Intrinsic::aarch64_neon_uminv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::UMINV, N, DAG);
+ case Intrinsic::aarch64_neon_smaxv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG);
+ case Intrinsic::aarch64_neon_umaxv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
case Intrinsic::aarch64_neon_fmax:
return DAG.getNode(AArch64ISD::FMAX, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
@@ -8792,9 +8812,11 @@ bool AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
}
// For the real atomic operations, we have ldxr/stxr up to 128 bits,
-bool AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+TargetLoweringBase::AtomicRMWExpansionKind
+AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
- return Size <= 128;
+ return Size <= 128 ? AtomicRMWExpansionKind::LLSC
+ : AtomicRMWExpansionKind::None;
}
bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const {
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index e973364..5ff11e8 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -30,9 +30,9 @@ enum {
WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
CALL, // Function call.
- // Almost the same as a normal call node, except that a TLSDesc relocation is
- // needed so the linker can relax it correctly if possible.
- TLSDESC_CALL,
+ // Produces the full sequence of instructions for getting the thread pointer
+ // offset of a variable into X0, using the TLSDesc model.
+ TLSDESC_CALLSEQ,
ADRP, // Page address of a TargetGlobalAddress operand.
ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
LOADgot, // Load from automatically generated descriptor (e.g. Global
@@ -141,6 +141,18 @@ enum {
FCMLEz,
FCMLTz,
+ // Vector across-lanes addition
+ // Only the lower result lane is defined.
+ SADDV,
+ UADDV,
+
+ // Vector across-lanes min/max
+ // Only the lower result lane is defined.
+ SMINV,
+ UMINV,
+ SMAXV,
+ UMAXV,
+
// Vector bitwise negation
NOT,
@@ -335,7 +347,8 @@ public:
bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ TargetLoweringBase::AtomicRMWExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
bool useLoadStackGuardNode() const override;
TargetLoweringBase::LegalizeTypeAction
@@ -399,8 +412,8 @@ private:
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerELFTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL,
- SelectionDAG &DAG) const;
+ SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
+ SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -460,6 +473,16 @@ private:
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ if (ConstraintCode == "Q")
+ return InlineAsm::Constraint_Q;
+ // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
+ // followed by llvm_unreachable so we'll leave them unimplemented in
+ // the backend for now.
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
bool mayBeEmittedAsTailCall(CallInst *CI) const override;
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 64cec55..8e0af2d 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
: AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
- RI(this, &STI), Subtarget(STI) {}
+ RI(STI.getTargetTriple()), Subtarget(STI) {}
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
@@ -2068,10 +2068,10 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
.setMIFlag(Flag);
}
-MachineInstr *
-AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
+MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ int FrameIndex) const {
// This is a bit of a hack. Consider this instruction:
//
// %vreg0<def> = COPY %SP; GPR64all:%vreg0
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index d8f1274..fa4b8b7 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -129,10 +129,9 @@ public:
const TargetRegisterInfo *TRI) const override;
using TargetInstrInfo::foldMemoryOperandImpl;
- MachineInstr *
- foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const override;
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ int FrameIndex) const override;
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 6e4c0b0..ec6fa5c 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -22,6 +22,8 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
AssemblerPredicate<"FeatureCrypto", "crypto">;
def HasCRC : Predicate<"Subtarget->hasCRC()">,
AssemblerPredicate<"FeatureCRC", "crc">;
+def HasV8_1a : Predicate<"Subtarget->hasV8_1a()">,
+ AssemblerPredicate<"FeatureV8_1a", "v8.1a">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsCyclone : Predicate<"Subtarget->isCyclone()">;
@@ -96,6 +98,19 @@ def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
SDTCisPtrTy<1>]>;
+
+// Generates the general dynamic sequences, i.e.
+// adrp x0, :tlsdesc:var
+// ldr x1, [x0, #:tlsdesc_lo12:var]
+// add x0, x0, #:tlsdesc_lo12:var
+// .tlsdesccall var
+// blr x1
+
+// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
+// number of operands (the variable)
+def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
+ [SDTCisPtrTy<0>]>;
+
def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
[SDTCisVT<0, i64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
@@ -229,10 +244,11 @@ def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
-def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL",
- SDT_AArch64TLSDescCall,
- [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
- SDNPVariadic]>;
+def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
+ SDT_AArch64TLSDescCallSeq,
+ [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
+ SDNPVariadic]>;
+
def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
SDT_AArch64WrapperLarge>;
@@ -244,6 +260,13 @@ def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>;
def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>;
+def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
+def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
+def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
+def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
+def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
+def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
+
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -1049,15 +1072,16 @@ def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> {
let AsmString = ".tlsdesccall $sym";
}
-// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It
-// gets expanded to two MCInsts during lowering.
-let isCall = 1, Defs = [LR] in
-def TLSDESC_BLR
- : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym),
- [(AArch64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>;
+// FIXME: maybe the scratch register used shouldn't be fixed to X1?
+// FIXME: can "hasSideEffects be dropped?
+let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1,
+ isCodeGenOnly = 1 in
+def TLSDESC_CALLSEQ
+ : Pseudo<(outs), (ins i64imm:$sym),
+ [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>;
+def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
+ (TLSDESC_CALLSEQ texternalsym:$sym)>;
-def : Pat<(AArch64tlsdesc_call GPR64:$dest, texternalsym:$sym),
- (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>;
//===----------------------------------------------------------------------===//
// Conditional branch (immediate) instruction.
//===----------------------------------------------------------------------===//
@@ -2326,8 +2350,15 @@ defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>;
defm FMOV : UnscaledConversion<"fmov">;
-def : Pat<(f32 (fpimm0)), (FMOVWSr WZR)>, Requires<[NoZCZ]>;
-def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>;
+// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
+let isReMaterializable = 1, isCodeGenOnly = 1 in {
+def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
+ PseudoInstExpansion<(FMOVWSr FPR32:$Rd, WZR)>,
+ Requires<[NoZCZ]>;
+def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
+ PseudoInstExpansion<(FMOVXDr FPR64:$Rd, XZR)>,
+ Requires<[NoZCZ]>;
+}
//===----------------------------------------------------------------------===//
// Floating point conversion instruction.
@@ -3416,10 +3447,10 @@ defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">;
defm FMAXP : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">;
defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">;
defm FMINP : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">;
-def : Pat<(i64 (int_aarch64_neon_saddv (v2i64 V128:$Rn))),
- (ADDPv2i64p V128:$Rn)>;
-def : Pat<(i64 (int_aarch64_neon_uaddv (v2i64 V128:$Rn))),
- (ADDPv2i64p V128:$Rn)>;
+def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
+def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
(FADDPv2i32p V64:$Rn)>;
def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
@@ -3709,10 +3740,6 @@ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
-defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>;
-defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>;
-defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>;
-defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi32lane>;
// Floating point vector extractions are codegen'd as either a sequence of
@@ -3776,121 +3803,143 @@ defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
-multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, Intrinsic intOp> {
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
- def : Pat<(i32 (sext_inreg (i32 (intOp (v8i8 V64:$Rn))), i8)),
- (i32 (SMOVvi8to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- (i64 0)))>;
- def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
- (i32 (SMOVvi8to32
+// Patterns for across-vector intrinsics, that have a node equivalent, that
+// returns a vector (with only the low lane defined) instead of a scalar.
+// In effect, opNode is the same as (scalar_to_vector (IntNode)).
+multiclass SIMDAcrossLanesIntrinsic<string baseOpc,
+ SDPatternOperator opNode> {
+// If a lane instruction caught the vector_extract around opNode, we can
+// directly match the latter to the instruction.
+def : Pat<(v8i8 (opNode V64:$Rn)),
+ (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>;
+def : Pat<(v16i8 (opNode V128:$Rn)),
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- (i64 0)))>;
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v16i8 V128:$Rn))), i8)),
- (i32 (SMOVvi8to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
- (i64 0)))>;
-def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
- (i32 (SMOVvi8to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
- (i64 0)))>;
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>;
+def : Pat<(v4i16 (opNode V64:$Rn)),
+ (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>;
+def : Pat<(v8i16 (opNode V128:$Rn)),
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>;
+def : Pat<(v4i32 (opNode V128:$Rn)),
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>;
+
+
+// If none did, fallback to the explicit patterns, consuming the vector_extract.
+def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
+ (i32 0)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
+ bsub), ssub)>;
+def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
+ bsub), ssub)>;
+def : Pat<(i32 (vector_extract (insert_subvector undef,
+ (v4i16 (opNode V64:$Rn)), (i32 0)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
+ hsub), ssub)>;
+def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn),
+ hsub), ssub)>;
+def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn),
+ ssub), ssub)>;
+
+}
+
+multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
+ SDPatternOperator opNode>
+ : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
// If there is a sign extension after this intrinsic, consume it as smov already
// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v4i16 V64:$Rn))), i16)),
+def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
+ (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), i8)),
+ (i32 (SMOVvi8to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ (i64 0)))>;
+def : Pat<(i32 (sext_inreg (i32 (vector_extract
+ (opNode (v16i8 V128:$Rn)), (i64 0))), i8)),
+ (i32 (SMOVvi8to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
+ (i64 0)))>;
+def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
+ (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), i16)),
(i32 (SMOVvi16to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
(i64 0)))>;
-def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+def : Pat<(i32 (sext_inreg (i32 (vector_extract
+ (opNode (v8i16 V128:$Rn)), (i64 0))), i16)),
(i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
- (i64 0)))>;
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v8i16 V128:$Rn))), i16)),
- (i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
- (i64 0)))>;
-def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
- (i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
- (i64 0)))>;
-
-def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
- ssub))>;
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
+ (i64 0)))>;
}
-multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, Intrinsic intOp> {
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
- def : Pat<(i32 (and (i32 (intOp (v8i8 V64:$Rn))), maski8_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- ssub))>;
- def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- ssub))>;
+multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
+ SDPatternOperator opNode>
+ : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
// If there is a masking operation keeping only what has been actually
// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v16i8 V128:$Rn))), maski8_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
- ssub))>;
-def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
+def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
+ (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), maski8_or_more)),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ ssub))>;
+def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
+ maski8_or_more)),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
ssub))>;
-
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v4i16 V64:$Rn))), maski16_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
- ssub))>;
-def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
+ (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), maski16_or_more)),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
ssub))>;
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v8i16 V128:$Rn))), maski16_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
- ssub))>;
-def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
+def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
+ maski16_or_more)),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
ssub))>;
+}
-def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
- ssub))>;
+defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>;
+// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
+def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),
+ (ADDPv2i32 V64:$Rn, V64:$Rn)>;
-}
+defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>;
+// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
+def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))),
+ (ADDPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>;
+def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))),
+ (SMAXPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>;
+def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))),
+ (SMINPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>;
+def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))),
+ (UMAXPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>;
+def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))),
+ (UMINPv2i32 V64:$Rn, V64:$Rn)>;
multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
@@ -3953,32 +4002,6 @@ def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
dsub))>;
}
-defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", int_aarch64_neon_saddv>;
-// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
-def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", int_aarch64_neon_uaddv>;
-// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
-def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_aarch64_neon_smaxv>;
-def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_aarch64_neon_sminv>;
-def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_aarch64_neon_umaxv>;
-def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_aarch64_neon_uminv>;
-def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>;
defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>;
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 8463ce6..b1499e2 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -63,16 +63,24 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// If a matching instruction is found, MergeForward is set to true if the
// merge is to remove the first instruction and replace the second with
// a pair-wise insn, and false if the reverse is true.
+ // \p SExtIdx[out] gives the index of the result of the load pair that
+ // must be extended. The value of SExtIdx assumes that the paired load
+ // produces the value in this order: (I, returned iterator), i.e.,
+ // -1 means no value has to be extended, 0 means I, and 1 means the
+ // returned iterator.
MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
- bool &MergeForward,
+ bool &MergeForward, int &SExtIdx,
unsigned Limit);
// Merge the two instructions indicated into a single pair-wise instruction.
// If MergeForward is true, erase the first instruction and fold its
// operation into the second. If false, the reverse. Return the instruction
// following the first instruction (which may change during processing).
+ // \p SExtIdx index of the result that must be extended for a paired load.
+ // -1 means none, 0 means I, and 1 means Paired.
MachineBasicBlock::iterator
mergePairedInsns(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired, bool MergeForward);
+ MachineBasicBlock::iterator Paired, bool MergeForward,
+ int SExtIdx);
// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
@@ -181,6 +189,43 @@ int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
}
}
+static unsigned getMatchingNonSExtOpcode(unsigned Opc,
+ bool *IsValidLdStrOpc = nullptr) {
+ if (IsValidLdStrOpc)
+ *IsValidLdStrOpc = true;
+ switch (Opc) {
+ default:
+ if (IsValidLdStrOpc)
+ *IsValidLdStrOpc = false;
+ return UINT_MAX;
+ case AArch64::STRDui:
+ case AArch64::STURDi:
+ case AArch64::STRQui:
+ case AArch64::STURQi:
+ case AArch64::STRWui:
+ case AArch64::STURWi:
+ case AArch64::STRXui:
+ case AArch64::STURXi:
+ case AArch64::LDRDui:
+ case AArch64::LDURDi:
+ case AArch64::LDRQui:
+ case AArch64::LDURQi:
+ case AArch64::LDRWui:
+ case AArch64::LDURWi:
+ case AArch64::LDRXui:
+ case AArch64::LDURXi:
+ case AArch64::STRSui:
+ case AArch64::STURSi:
+ case AArch64::LDRSui:
+ case AArch64::LDURSi:
+ return Opc;
+ case AArch64::LDRSWui:
+ return AArch64::LDRWui;
+ case AArch64::LDURSWi:
+ return AArch64::LDURWi;
+ }
+}
+
static unsigned getMatchingPairOpcode(unsigned Opc) {
switch (Opc) {
default:
@@ -282,7 +327,7 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
- bool MergeForward) {
+ bool MergeForward, int SExtIdx) {
MachineBasicBlock::iterator NextI = I;
++NextI;
// If NextI is the second of the two instructions to be merged, we need
@@ -292,11 +337,13 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
if (NextI == Paired)
++NextI;
- bool IsUnscaled = isUnscaledLdst(I->getOpcode());
+ unsigned Opc =
+ SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
+ bool IsUnscaled = isUnscaledLdst(Opc);
int OffsetStride =
IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1;
- unsigned NewOpc = getMatchingPairOpcode(I->getOpcode());
+ unsigned NewOpc = getMatchingPairOpcode(Opc);
// Insert our new paired instruction after whichever of the paired
// instructions MergeForward indicates.
MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
@@ -311,6 +358,11 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
Paired->getOperand(2).getImm() + OffsetStride) {
RtMI = Paired;
Rt2MI = I;
+ // Here we swapped the assumption made for SExtIdx.
+ // I.e., we turn ldp I, Paired into ldp Paired, I.
+ // Update the index accordingly.
+ if (SExtIdx != -1)
+ SExtIdx = (SExtIdx + 1) % 2;
} else {
RtMI = I;
Rt2MI = Paired;
@@ -337,8 +389,47 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
DEBUG(dbgs() << " ");
DEBUG(Paired->print(dbgs()));
DEBUG(dbgs() << " with instruction:\n ");
- DEBUG(((MachineInstr *)MIB)->print(dbgs()));
- DEBUG(dbgs() << "\n");
+
+ if (SExtIdx != -1) {
+ // Generate the sign extension for the proper result of the ldp.
+ // I.e., with X1, that would be:
+ // %W1<def> = KILL %W1, %X1<imp-def>
+ // %X1<def> = SBFMXri %X1<kill>, 0, 31
+ MachineOperand &DstMO = MIB->getOperand(SExtIdx);
+ // Right now, DstMO has the extended register, since it comes from an
+ // extended opcode.
+ unsigned DstRegX = DstMO.getReg();
+ // Get the W variant of that register.
+ unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
+ // Update the result of LDP to use the W instead of the X variant.
+ DstMO.setReg(DstRegW);
+ DEBUG(((MachineInstr *)MIB)->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ // Make the machine verifier happy by providing a definition for
+ // the X register.
+ // Insert this definition right after the generated LDP, i.e., before
+ // InsertionPoint.
+ MachineInstrBuilder MIBKill =
+ BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(TargetOpcode::KILL), DstRegW)
+ .addReg(DstRegW)
+ .addReg(DstRegX, RegState::Define);
+ MIBKill->getOperand(2).setImplicit();
+ // Create the sign extension.
+ MachineInstrBuilder MIBSXTW =
+ BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(AArch64::SBFMXri), DstRegX)
+ .addReg(DstRegX)
+ .addImm(0)
+ .addImm(31);
+ (void)MIBSXTW;
+ DEBUG(dbgs() << " Extend operand:\n ");
+ DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ } else {
+ DEBUG(((MachineInstr *)MIB)->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
// Erase the old instructions.
I->eraseFromParent();
@@ -396,7 +487,8 @@ static int alignTo(int Num, int PowOf2) {
/// be combined with the current instruction into a load/store pair.
MachineBasicBlock::iterator
AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
- bool &MergeForward, unsigned Limit) {
+ bool &MergeForward, int &SExtIdx,
+ unsigned Limit) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator MBBI = I;
MachineInstr *FirstMI = I;
@@ -436,7 +528,19 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// Now that we know this is a real instruction, count it.
++Count;
- if (Opc == MI->getOpcode() && MI->getOperand(2).isImm()) {
+ bool CanMergeOpc = Opc == MI->getOpcode();
+ SExtIdx = -1;
+ if (!CanMergeOpc) {
+ bool IsValidLdStrOpc;
+ unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc);
+ if (!IsValidLdStrOpc)
+ continue;
+ // Opc will be the first instruction in the pair.
+ SExtIdx = NonSExtOpc == (unsigned)Opc ? 1 : 0;
+ CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode());
+ }
+
+ if (CanMergeOpc && MI->getOperand(2).isImm()) {
// If we've found another instruction with the same opcode, check to see
// if the base and offset are compatible with our starting instruction.
// These instructions all have scaled immediate operands, so we just
@@ -823,13 +927,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
}
// Look ahead up to ScanLimit instructions for a pairable instruction.
bool MergeForward = false;
+ int SExtIdx = -1;
MachineBasicBlock::iterator Paired =
- findMatchingInsn(MBBI, MergeForward, ScanLimit);
+ findMatchingInsn(MBBI, MergeForward, SExtIdx, ScanLimit);
if (Paired != E) {
// Merge the loads into a pair. Keeping the iterator straight is a
// pain, so we let the merge routine tell us what the next instruction
// is after it's done mucking about.
- MBBI = mergePairedInsns(MBBI, Paired, MergeForward);
+ MBBI = mergePairedInsns(MBBI, Paired, MergeForward, SExtIdx);
Modified = true;
++NumPairCreated;
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index e57b0f4..b829341 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -22,9 +22,12 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+extern cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration;
+
AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, AsmPrinter &printer)
: Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {}
@@ -84,10 +87,16 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
if (MO.isGlobal()) {
const GlobalValue *GV = MO.getGlobal();
Model = Printer.TM.getTLSModel(GV);
+ if (!EnableAArch64ELFLocalDynamicTLSGeneration &&
+ Model == TLSModel::LocalDynamic)
+ Model = TLSModel::GeneralDynamic;
+
} else {
assert(MO.isSymbol() &&
StringRef(MO.getSymbolName()) == "_TLS_MODULE_BASE_" &&
"unexpected external TLS symbol");
+ // The general dynamic access sequence is used to get the
+ // address of _TLS_MODULE_BASE_.
Model = TLSModel::GeneralDynamic;
}
switch (Model) {
@@ -123,6 +132,8 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
RefFlags |= AArch64MCExpr::VK_G1;
else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G0)
RefFlags |= AArch64MCExpr::VK_G0;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_HI12)
+ RefFlags |= AArch64MCExpr::VK_HI12;
if (MO.getTargetFlags() & AArch64II::MO_NC)
RefFlags |= AArch64MCExpr::VK_NC;
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
index 4690177..5394875 100644
--- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
@@ -319,7 +319,7 @@ void A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd,
static bool regJustKilledBefore(const LiveIntervals &LIs, unsigned reg,
const MachineInstr &MI) {
- LiveInterval LI = LIs.getInterval(reg);
+ const LiveInterval &LI = LIs.getInterval(reg);
SlotIndex SI = LIs.getInstructionIndex(&MI);
return LI.expiredAt(SI);
}
diff --git a/lib/Target/AArch64/AArch64PromoteConstant.cpp b/lib/Target/AArch64/AArch64PromoteConstant.cpp
index c037c86..e1b93bf 100644
--- a/lib/Target/AArch64/AArch64PromoteConstant.cpp
+++ b/lib/Target/AArch64/AArch64PromoteConstant.cpp
@@ -38,6 +38,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -189,9 +190,11 @@ private:
IPI->second.push_back(&Use);
// Transfer the dominated uses of IPI to NewPt
// Inserting into the DenseMap may invalidate existing iterator.
- // Keep a copy of the key to find the iterator to erase.
+ // Keep a copy of the key to find the iterator to erase. Keep a copy of the
+ // value so that we don't have to dereference IPI->second.
Instruction *OldInstr = IPI->first;
- InsertPts[NewPt] = std::move(IPI->second);
+ Uses OldUses = std::move(IPI->second);
+ InsertPts[NewPt] = std::move(OldUses);
// Erase IPI.
InsertPts.erase(OldInstr);
}
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 206cdbb..33c11fe 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -18,6 +18,7 @@
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -37,9 +38,8 @@ static cl::opt<bool>
ReserveX18("aarch64-reserve-x18", cl::Hidden,
cl::desc("Reserve X18, making it unavailable as GPR"));
-AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii,
- const AArch64Subtarget *sti)
- : AArch64GenRegisterInfo(AArch64::LR), TII(tii), STI(sti) {}
+AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
+ : AArch64GenRegisterInfo(AArch64::LR), TT(TT) {}
const MCPhysReg *
AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
@@ -55,7 +55,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
const uint32_t *
-AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
if (CC == CallingConv::GHC)
// This is academic becase all GHC calls are (supposed to be) tail calls
return CSR_AArch64_NoRegs_RegMask;
@@ -66,15 +67,16 @@ AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
}
const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
- if (STI->isTargetDarwin())
+ if (TT.isOSDarwin())
return CSR_AArch64_TLS_Darwin_RegMask;
- assert(STI->isTargetELF() && "only expect Darwin or ELF TLS");
+ assert(TT.isOSBinFormatELF() && "only expect Darwin or ELF TLS");
return CSR_AArch64_TLS_ELF_RegMask;
}
const uint32_t *
-AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
+AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
// This should return a register mask that is the same as that returned by
// getCallPreservedMask but that additionally preserves the register used for
// the first i64 argument (which must also be the register used to return a
@@ -97,12 +99,12 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AArch64::WSP);
Reserved.set(AArch64::WZR);
- if (TFI->hasFP(MF) || STI->isTargetDarwin()) {
+ if (TFI->hasFP(MF) || TT.isOSDarwin()) {
Reserved.set(AArch64::FP);
Reserved.set(AArch64::W29);
}
- if (STI->isTargetDarwin() || ReserveX18) {
+ if (TT.isOSDarwin() || ReserveX18) {
Reserved.set(AArch64::X18); // Platform register
Reserved.set(AArch64::W18);
}
@@ -129,10 +131,10 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
return true;
case AArch64::X18:
case AArch64::W18:
- return STI->isTargetDarwin() || ReserveX18;
+ return TT.isOSDarwin() || ReserveX18;
case AArch64::FP:
case AArch64::W29:
- return TFI->hasFP(MF) || STI->isTargetDarwin();
+ return TFI->hasFP(MF) || TT.isOSDarwin();
case AArch64::W19:
case AArch64::X19:
return hasBasePointer(MF);
@@ -269,7 +271,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
// The FP is only available if there is no dynamic realignment. We
// don't know for sure yet whether we'll need that, so we guess based
// on whether there are any local variables that would trigger it.
- if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, FPOffset))
+ if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, AArch64::FP, FPOffset))
return false;
// If we can reference via the stack pointer or base pointer, try that.
@@ -277,7 +279,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
// to only disallow SP relative references in the live range of
// the VLA(s). In practice, it's unclear how much difference that
// would make, but it may be worth doing.
- if (isFrameOffsetLegal(MI, Offset))
+ if (isFrameOffsetLegal(MI, AArch64::SP, Offset))
return false;
// The offset likely isn't legal; we want to allocate a virtual base register.
@@ -285,6 +287,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
}
bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ unsigned BaseReg,
int64_t Offset) const {
assert(Offset <= INT_MAX && "Offset too big to fit in int.");
assert(MI && "Unable to get the legal offset for nil instruction.");
@@ -302,10 +305,11 @@ void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
DebugLoc DL; // Defaults to "unknown"
if (Ins != MBB->end())
DL = Ins->getDebugLoc();
-
+ const MachineFunction &MF = *MBB->getParent();
+ const AArch64InstrInfo *TII =
+ MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
const MCInstrDesc &MCID = TII->get(AArch64::ADDXri);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- const MachineFunction &MF = *MBB->getParent();
MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF));
unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
@@ -324,6 +328,9 @@ void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
++i;
assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
}
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const AArch64InstrInfo *TII =
+ MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
bool Done = rewriteAArch64FrameIndex(MI, i, BaseReg, Off, TII);
assert(Done && "Unable to resolve frame index!");
(void)Done;
@@ -337,6 +344,8 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const AArch64InstrInfo *TII =
+ MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
const AArch64FrameLowering *TFI = static_cast<const AArch64FrameLowering *>(
MF.getSubtarget().getFrameLowering());
@@ -389,10 +398,10 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case AArch64::GPR64RegClassID:
case AArch64::GPR32commonRegClassID:
case AArch64::GPR64commonRegClassID:
- return 32 - 1 // XZR/SP
- - (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP
- - (STI->isTargetDarwin() || ReserveX18) // X18 reserved as platform register
- - hasBasePointer(MF); // X19
+ return 32 - 1 // XZR/SP
+ - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP
+ - (TT.isOSDarwin() || ReserveX18) // X18 reserved as platform register
+ - hasBasePointer(MF); // X19
case AArch64::FPR8RegClassID:
case AArch64::FPR16RegClassID:
case AArch64::FPR32RegClassID:
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h
index 51a5034..c01bfa5 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -19,26 +19,24 @@
namespace llvm {
-class AArch64InstrInfo;
-class AArch64Subtarget;
class MachineFunction;
class RegScavenger;
class TargetRegisterClass;
+class Triple;
struct AArch64RegisterInfo : public AArch64GenRegisterInfo {
private:
- const AArch64InstrInfo *TII;
- const AArch64Subtarget *STI;
+ const Triple &TT;
public:
- AArch64RegisterInfo(const AArch64InstrInfo *tii, const AArch64Subtarget *sti);
+ AArch64RegisterInfo(const Triple &TT);
bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
/// Code Generation virtual methods...
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
- const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const override;
unsigned getCSRFirstUseCost() const override {
// The cost will be compared against BlockFrequency where entry has the
@@ -59,7 +57,8 @@ public:
///
/// Should return NULL in the case that the calling convention does not have
/// this property
- const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
+ const uint32_t *getThisReturnPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const;
BitVector getReservedRegs(const MachineFunction &MF) const override;
const TargetRegisterClass *
@@ -73,7 +72,7 @@ public:
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
- bool isFrameOffsetLegal(const MachineInstr *MI,
+ bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
int64_t Offset) const override;
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg,
int FrameIdx,
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index c613025..221d70d 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -48,7 +48,7 @@ AArch64Subtarget::AArch64Subtarget(const std::string &TT,
const TargetMachine &TM, bool LittleEndian)
: AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
- HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
+ HasV8_1a(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(),
InstrInfo(initializeSubtargetDependencies(FS)),
TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {}
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index d418cc5..bcab97d 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -41,6 +41,7 @@ protected:
bool HasNEON;
bool HasCrypto;
bool HasCRC;
+ bool HasV8_1a;
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove;
@@ -86,6 +87,7 @@ public:
const AArch64RegisterInfo *getRegisterInfo() const override {
return &getInstrInfo()->getRegisterInfo();
}
+ const Triple &getTargetTriple() const { return TargetTriple; }
bool enableMachineScheduler() const override { return true; }
bool enablePostMachineScheduler() const override {
return isCortexA53() || isCortexA57();
@@ -99,6 +101,7 @@ public:
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
+ bool hasV8_1a() const { return HasV8_1a; }
bool isLittleEndian() const { return IsLittle; }
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index d73d0b3..f902f64 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -104,6 +104,16 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
return make_unique<AArch64_ELFTargetObjectFile>();
}
+// Helper function to build a DataLayout string
+static std::string computeDataLayout(StringRef TT, bool LittleEndian) {
+ Triple Triple(TT);
+ if (Triple.isOSBinFormatMachO())
+ return "e-m:o-i64:64-i128:128-n32:64-S128";
+ if (LittleEndian)
+ return "e-m:e-i64:64-i128:128-n32:64-S128";
+ return "E-m:e-i64:64-i128:128-n32:64-S128";
+}
+
/// TargetMachine ctor - Create an AArch64 architecture model.
///
AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
@@ -112,16 +122,12 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool LittleEndian)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- // This nested ternary is horrible, but DL needs to be properly
- // initialized
- // before TLInfo is constructed.
- DL(Triple(TT).isOSBinFormatMachO()
- ? "e-m:o-i64:64-i128:128-n32:64-S128"
- : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128"
- : "E-m:e-i64:64-i128:128-n32:64-S128")),
+ // This nested ternary is horrible, but DL needs to be properly
+ // initialized before TLInfo is constructed.
+ : LLVMTargetMachine(T, computeDataLayout(TT, LittleEndian), TT, CPU, FS,
+ Options, RM, CM, OL),
TLOF(createTLOF(Triple(getTargetTriple()))),
- Subtarget(TT, CPU, FS, *this, LittleEndian), isLittle(LittleEndian) {
+ isLittle(LittleEndian) {
initAsmInfo();
}
@@ -239,7 +245,7 @@ bool AArch64PassConfig::addPreISel() {
// FIXME: On AArch64, this depends on the type.
// Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
// and the offset has to be a multiple of the related size in bytes.
- if (TM->getOptLevel() != CodeGenOpt::None)
+ if (TM->getOptLevel() == CodeGenOpt::Aggressive)
addPass(createGlobalMergePass(TM, 4095));
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createAArch64AddressTypePromotionPass());
@@ -287,10 +293,7 @@ void AArch64PassConfig::addPostRegAlloc() {
// Change dead register definitions to refer to the zero register.
if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
addPass(createAArch64DeadRegisterDefinitions());
- if (TM->getOptLevel() != CodeGenOpt::None &&
- (TM->getSubtarget<AArch64Subtarget>().isCortexA53() ||
- TM->getSubtarget<AArch64Subtarget>().isCortexA57()) &&
- usingDefaultRegAlloc())
+ if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc())
// Improve performance for some FP/SIMD code for A57.
addPass(createAArch64A57FPLoadBalancing());
}
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
index 7143adf..ec34fad 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -23,9 +23,7 @@ namespace llvm {
class AArch64TargetMachine : public LLVMTargetMachine {
protected:
- const DataLayout DL;
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- AArch64Subtarget Subtarget;
mutable StringMap<std::unique_ptr<AArch64Subtarget>> SubtargetMap;
public:
@@ -35,11 +33,6 @@ public:
CodeGenOpt::Level OL, bool IsLittleEndian);
~AArch64TargetMachine() override;
-
- const DataLayout *getDataLayout() const override { return &DL; }
- const AArch64Subtarget *getSubtargetImpl() const override {
- return &Subtarget;
- }
const AArch64Subtarget *getSubtargetImpl(const Function &F) const override;
// Pass Pipeline Configuration
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index 4069038..8ff58e9 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -13,6 +13,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/Dwarf.h"
using namespace llvm;
using namespace dwarf;
@@ -23,6 +24,11 @@ void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
InitializeELF(TM.Options.UseInitArray);
}
+AArch64_MachoTargetObjectFile::AArch64_MachoTargetObjectFile()
+ : TargetLoweringObjectFileMachO() {
+ SupportGOTPCRelWithOffset = false;
+}
+
const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference(
const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
const TargetMachine &TM, MachineModuleInfo *MMI,
@@ -50,3 +56,18 @@ MCSymbol *AArch64_MachoTargetObjectFile::getCFIPersonalitySymbol(
MachineModuleInfo *MMI) const {
return TM.getSymbol(GV, Mang);
}
+
+const MCExpr *AArch64_MachoTargetObjectFile::getIndirectSymViaGOTPCRel(
+ const MCSymbol *Sym, const MCValue &MV, int64_t Offset,
+ MachineModuleInfo *MMI, MCStreamer &Streamer) const {
+ assert((Offset+MV.getConstant() == 0) &&
+ "Arch64 does not support GOT PC rel with extra offset");
+ // On ARM64 Darwin, we can reference symbols with foo@GOT-., which
+ // is an indirect pc-relative reference.
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext());
+ MCSymbol *PCSym = getContext().CreateTempSymbol();
+ Streamer.EmitLabel(PCSym);
+ const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
+ return MCBinaryExpr::CreateSub(Res, PC, getContext());
+}
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h
index 2e595f9..d41f445 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -24,6 +24,8 @@ class AArch64_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
/// AArch64_MachoTargetObjectFile - This TLOF implementation is used for Darwin.
class AArch64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
public:
+ AArch64_MachoTargetObjectFile();
+
const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
unsigned Encoding, Mangler &Mang,
const TargetMachine &TM,
@@ -33,6 +35,11 @@ public:
MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang,
const TargetMachine &TM,
MachineModuleInfo *MMI) const override;
+
+ const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
+ const MCValue &MV, int64_t Offset,
+ MachineModuleInfo *MMI,
+ MCStreamer &Streamer) const override;
};
} // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 0646d85..0533355 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -10,6 +10,7 @@
#include "AArch64TargetTransformInfo.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
@@ -352,7 +353,7 @@ unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
// We don't lower vector selects well that are wider than the register width.
if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
// We would need this many instructions to hide the scalarization happening.
- unsigned AmortizationCost = 20;
+ const unsigned AmortizationCost = 20;
static const TypeConversionCostTblEntry<MVT::SimpleValueType>
VectorSelectTbl[] = {
{ ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost },
@@ -426,6 +427,15 @@ unsigned AArch64TTIImpl::getMaxInterleaveFactor() {
void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
TTI::UnrollingPreferences &UP) {
+ // Enable partial unrolling and runtime unrolling.
+ BaseT::getUnrollingPreferences(L, UP);
+
+ // For inner loop, it is more likely to be a hot one, and the runtime check
+ // can be promoted out from LICM pass, so the overhead is less, let's try
+ // a larger threshold to unroll more loops.
+ if (L->getLoopDepth() > 1)
+ UP.PartialThreshold *= 2;
+
// Disable partial & runtime unrolling on -Os.
UP.PartialOptSizeThreshold = 0;
}
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 1960c99..1219ffc 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -113,11 +113,10 @@ public:
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "AArch64GenAsmMatcher.inc"
};
- AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &MII,
- const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI) {
- MCAsmParserExtension::Initialize(_Parser);
+ AArch64AsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(STI) {
+ MCAsmParserExtension::Initialize(Parser);
MCStreamer &S = getParser().getStreamer();
if (S.getTargetStreamer() == nullptr)
new AArch64TargetStreamer(S);
@@ -205,6 +204,8 @@ private:
struct BarrierOp {
unsigned Val; // Not the enum since not all values have names.
+ const char *Data;
+ unsigned Length;
};
struct SysRegOp {
@@ -221,6 +222,8 @@ private:
struct PrefetchOp {
unsigned Val;
+ const char *Data;
+ unsigned Length;
};
struct ShiftExtendOp {
@@ -254,8 +257,7 @@ private:
MCContext &Ctx;
public:
- AArch64Operand(KindTy K, MCContext &_Ctx)
- : MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {}
+ AArch64Operand(KindTy K, MCContext &Ctx) : Kind(K), Ctx(Ctx) {}
AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) {
Kind = o.Kind;
@@ -349,6 +351,11 @@ public:
return Barrier.Val;
}
+ StringRef getBarrierName() const {
+ assert(Kind == k_Barrier && "Invalid access!");
+ return StringRef(Barrier.Data, Barrier.Length);
+ }
+
unsigned getReg() const override {
assert(Kind == k_Register && "Invalid access!");
return Reg.RegNum;
@@ -384,6 +391,11 @@ public:
return Prefetch.Val;
}
+ StringRef getPrefetchName() const {
+ assert(Kind == k_Prefetch && "Invalid access!");
+ return StringRef(Prefetch.Data, Prefetch.Length);
+ }
+
AArch64_AM::ShiftExtendType getShiftExtendType() const {
assert(Kind == k_ShiftExtend && "Invalid access!");
return ShiftExtend.Type;
@@ -752,58 +764,47 @@ public:
}
bool isMovZSymbolG3() const {
- static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 };
- return isMovWSymbol(Variants);
+ return isMovWSymbol(AArch64MCExpr::VK_ABS_G3);
}
bool isMovZSymbolG2() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S,
- AArch64MCExpr::VK_TPREL_G2, AArch64MCExpr::VK_DTPREL_G2};
- return isMovWSymbol(Variants);
+ return isMovWSymbol({AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S,
+ AArch64MCExpr::VK_TPREL_G2,
+ AArch64MCExpr::VK_DTPREL_G2});
}
bool isMovZSymbolG1() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S,
+ return isMovWSymbol({
+ AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S,
AArch64MCExpr::VK_GOTTPREL_G1, AArch64MCExpr::VK_TPREL_G1,
AArch64MCExpr::VK_DTPREL_G1,
- };
- return isMovWSymbol(Variants);
+ });
}
bool isMovZSymbolG0() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S,
- AArch64MCExpr::VK_TPREL_G0, AArch64MCExpr::VK_DTPREL_G0};
- return isMovWSymbol(Variants);
+ return isMovWSymbol({AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S,
+ AArch64MCExpr::VK_TPREL_G0,
+ AArch64MCExpr::VK_DTPREL_G0});
}
bool isMovKSymbolG3() const {
- static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 };
- return isMovWSymbol(Variants);
+ return isMovWSymbol(AArch64MCExpr::VK_ABS_G3);
}
bool isMovKSymbolG2() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G2_NC};
- return isMovWSymbol(Variants);
+ return isMovWSymbol(AArch64MCExpr::VK_ABS_G2_NC);
}
bool isMovKSymbolG1() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G1_NC, AArch64MCExpr::VK_TPREL_G1_NC,
- AArch64MCExpr::VK_DTPREL_G1_NC
- };
- return isMovWSymbol(Variants);
+ return isMovWSymbol({AArch64MCExpr::VK_ABS_G1_NC,
+ AArch64MCExpr::VK_TPREL_G1_NC,
+ AArch64MCExpr::VK_DTPREL_G1_NC});
}
bool isMovKSymbolG0() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC,
- AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC
- };
- return isMovWSymbol(Variants);
+ return isMovWSymbol(
+ {AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC,
+ AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC});
}
template<int RegWidth, int Shift>
@@ -1608,10 +1609,14 @@ public:
return Op;
}
- static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val, SMLoc S,
+ static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val,
+ StringRef Str,
+ SMLoc S,
MCContext &Ctx) {
auto Op = make_unique<AArch64Operand>(k_Barrier, Ctx);
Op->Barrier.Val = Val;
+ Op->Barrier.Data = Str.data();
+ Op->Barrier.Length = Str.size();
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
@@ -1642,10 +1647,14 @@ public:
return Op;
}
- static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val, SMLoc S,
+ static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val,
+ StringRef Str,
+ SMLoc S,
MCContext &Ctx) {
auto Op = make_unique<AArch64Operand>(k_Prefetch, Ctx);
Op->Prefetch.Val = Val;
+ Op->Barrier.Data = Str.data();
+ Op->Barrier.Length = Str.size();
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
@@ -1673,9 +1682,8 @@ void AArch64Operand::print(raw_ostream &OS) const {
<< AArch64_AM::getFPImmFloat(getFPImm()) << ") >";
break;
case k_Barrier: {
- bool Valid;
- StringRef Name = AArch64DB::DBarrierMapper().toString(getBarrier(), Valid);
- if (Valid)
+ StringRef Name = getBarrierName();
+ if (!Name.empty())
OS << "<barrier " << Name << ">";
else
OS << "<barrier invalid #" << getBarrier() << ">";
@@ -1718,9 +1726,8 @@ void AArch64Operand::print(raw_ostream &OS) const {
OS << "c" << getSysCR();
break;
case k_Prefetch: {
- bool Valid;
- StringRef Name = AArch64PRFM::PRFMMapper().toString(getPrefetch(), Valid);
- if (Valid)
+ StringRef Name = getPrefetchName();
+ if (!Name.empty())
OS << "<prfop " << Name << ">";
else
OS << "<prfop invalid #" << getPrefetch() << ">";
@@ -1963,7 +1970,11 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext()));
+ bool Valid;
+ auto Mapper = AArch64PRFM::PRFMMapper();
+ StringRef Name = Mapper.toString(MCE->getValue(), Valid);
+ Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Name,
+ S, getContext()));
return MatchOperand_Success;
}
@@ -1973,14 +1984,16 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
}
bool Valid;
- unsigned prfop = AArch64PRFM::PRFMMapper().fromString(Tok.getString(), Valid);
+ auto Mapper = AArch64PRFM::PRFMMapper();
+ unsigned prfop = Mapper.fromString(Tok.getString(), Valid);
if (!Valid) {
TokError("pre-fetch hint expected");
return MatchOperand_ParseFail;
}
Parser.Lex(); // Eat identifier token.
- Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext()));
+ Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Tok.getString(),
+ S, getContext()));
return MatchOperand_Success;
}
@@ -2582,8 +2595,11 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
Error(ExprLoc, "barrier operand out of range");
return MatchOperand_ParseFail;
}
- Operands.push_back(
- AArch64Operand::CreateBarrier(MCE->getValue(), ExprLoc, getContext()));
+ bool Valid;
+ auto Mapper = AArch64DB::DBarrierMapper();
+ StringRef Name = Mapper.toString(MCE->getValue(), Valid);
+ Operands.push_back( AArch64Operand::CreateBarrier(MCE->getValue(), Name,
+ ExprLoc, getContext()));
return MatchOperand_Success;
}
@@ -2593,7 +2609,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
}
bool Valid;
- unsigned Opt = AArch64DB::DBarrierMapper().fromString(Tok.getString(), Valid);
+ auto Mapper = AArch64DB::DBarrierMapper();
+ unsigned Opt = Mapper.fromString(Tok.getString(), Valid);
if (!Valid) {
TokError("invalid barrier option name");
return MatchOperand_ParseFail;
@@ -2605,8 +2622,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- Operands.push_back(
- AArch64Operand::CreateBarrier(Opt, getLoc(), getContext()));
+ Operands.push_back( AArch64Operand::CreateBarrier(Opt, Tok.getString(),
+ getLoc(), getContext()));
Parser.Lex(); // Consume the option
return MatchOperand_Success;
@@ -2631,8 +2648,8 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
assert(IsKnown == (MSRReg != -1U) &&
"register should be -1 if and only if it's unknown");
- uint32_t PStateField =
- AArch64PState::PStateMapper().fromString(Tok.getString(), IsKnown);
+ auto PStateMapper = AArch64PState::PStateMapper();
+ uint32_t PStateField = PStateMapper.fromString(Tok.getString(), IsKnown);
assert(IsKnown == (PStateField != -1U) &&
"register should be -1 if and only if it's unknown");
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 423da65..84b63a0 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -18,6 +18,7 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachO.h"
using namespace llvm;
@@ -493,14 +494,28 @@ void ELFAArch64AsmBackend::processFixupValue(
IsResolved = false;
}
+// Returns whether this fixup is based on an address in the .eh_frame section,
+// and therefore should be byte swapped.
+// FIXME: Should be replaced with something more principled.
+static bool isByteSwappedFixup(const MCExpr *E) {
+ MCValue Val;
+ if (!E->EvaluateAsRelocatable(Val, nullptr, nullptr))
+ return false;
+
+ if (!Val.getSymA() || Val.getSymA()->getSymbol().isUndefined())
+ return false;
+
+ const MCSectionELF *SecELF =
+ dyn_cast<MCSectionELF>(&Val.getSymA()->getSymbol().getSection());
+ return SecELF->getSectionName() == ".eh_frame";
+}
+
void ELFAArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
bool IsPCRel) const {
// store fixups in .eh_frame section in big endian order
if (!IsLittleEndian && Fixup.getKind() == FK_Data_4) {
- const MCSection *Sec = Fixup.getValue()->FindAssociatedSection();
- const MCSectionELF *SecELF = dyn_cast_or_null<const MCSectionELF>(Sec);
- if (SecELF && SecELF->getSectionName() == ".eh_frame")
+ if (isByteSwappedFixup(Fixup.getValue()))
Value = ByteSwap_32(unsigned(Value));
}
AArch64AsmBackend::applyFixup (Fixup, Data, DataSize, Value, IsPCRel);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 8dc6c30..8f780d2 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -203,24 +203,27 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) {
}
namespace llvm {
-MCStreamer *
-createAArch64MCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst) {
- MCStreamer *S = llvm::createAsmStreamer(
- Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
- new AArch64TargetAsmStreamer(*S, OS);
- return S;
+MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new AArch64TargetAsmStreamer(S, OS);
}
MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
bool RelaxAll) {
AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter);
- new AArch64TargetELFStreamer(*S);
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
return S;
}
+
+MCTargetStreamer *
+createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ Triple TT(STI.getTargetTriple());
+ if (TT.getObjectFormat() == Triple::ELF)
+ return new AArch64TargetELFStreamer(S);
+ return nullptr;
+}
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 4756a19..9ea49f0 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -38,9 +38,7 @@ class AArch64MCCodeEmitter : public MCCodeEmitter {
AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
void operator=(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
public:
- AArch64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
- MCContext &ctx)
- : Ctx(ctx) {}
+ AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) : Ctx(ctx) {}
~AArch64MCCodeEmitter() {}
@@ -205,9 +203,8 @@ public:
MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
- return new AArch64MCCodeEmitter(MCII, STI, Ctx);
+ return new AArch64MCCodeEmitter(MCII, Ctx);
}
/// getMachineOpValue - Return binary encoding of operand. If the machine
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index e396df8..9e31508 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/ELF.h"
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 0f7a6b8..38b399d 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -123,94 +123,61 @@ static MCInstPrinter *createAArch64MCInstPrinter(const Target &T,
return nullptr;
}
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- Triple TheTriple(TT);
-
- if (TheTriple.isOSDarwin())
- return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
- /*LabelSections*/ true);
-
+static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
+ MCAsmBackend &TAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll) {
return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
}
+static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll,
+ bool DWARFMustBeAtTheEnd) {
+ return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
+ DWARFMustBeAtTheEnd,
+ /*LabelSections*/ true);
+}
+
// Force static initialization.
extern "C" void LLVMInitializeAArch64TargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfoFn X(TheAArch64leTarget, createAArch64MCAsmInfo);
- RegisterMCAsmInfoFn Y(TheAArch64beTarget, createAArch64MCAsmInfo);
- RegisterMCAsmInfoFn Z(TheARM64Target, createAArch64MCAsmInfo);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(TheAArch64leTarget,
- createAArch64MCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheAArch64beTarget,
- createAArch64MCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheARM64Target,
- createAArch64MCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheAArch64leTarget,
- createAArch64MCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheAArch64beTarget,
- createAArch64MCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheARM64Target,
- createAArch64MCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheAArch64leTarget,
- createAArch64MCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheAArch64beTarget,
- createAArch64MCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheARM64Target,
- createAArch64MCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheAArch64leTarget,
- createAArch64MCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheAArch64beTarget,
- createAArch64MCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheARM64Target,
- createAArch64MCSubtargetInfo);
+ for (Target *T :
+ {&TheAArch64leTarget, &TheAArch64beTarget, &TheARM64Target}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(*T, createAArch64MCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createAArch64MCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createAArch64MCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createAArch64MCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createAArch64MCSubtargetInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(*T, createAArch64MCCodeEmitter);
+
+ // Register the obj streamers.
+ TargetRegistry::RegisterELFStreamer(*T, createELFStreamer);
+ TargetRegistry::RegisterMachOStreamer(*T, createMachOStreamer);
+
+ // Register the obj target streamer.
+ TargetRegistry::RegisterObjectTargetStreamer(
+ *T, createAArch64ObjectTargetStreamer);
+
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(*T,
+ createAArch64AsmTargetStreamer);
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createAArch64MCInstPrinter);
+ }
// Register the asm backend.
- TargetRegistry::RegisterMCAsmBackend(TheAArch64leTarget,
- createAArch64leAsmBackend);
+ for (Target *T : {&TheAArch64leTarget, &TheARM64Target})
+ TargetRegistry::RegisterMCAsmBackend(*T, createAArch64leAsmBackend);
TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget,
createAArch64beAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheARM64Target,
- createAArch64leAsmBackend);
-
- // Register the MC Code Emitter
- TargetRegistry::RegisterMCCodeEmitter(TheAArch64leTarget,
- createAArch64MCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheAArch64beTarget,
- createAArch64MCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheARM64Target,
- createAArch64MCCodeEmitter);
-
- // Register the object streamer.
- TargetRegistry::RegisterMCObjectStreamer(TheAArch64leTarget,
- createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheAArch64beTarget,
- createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheARM64Target, createMCStreamer);
-
- // Register the asm streamer.
- TargetRegistry::RegisterAsmStreamer(TheAArch64leTarget,
- createAArch64MCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheAArch64beTarget,
- createAArch64MCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheARM64Target,
- createAArch64MCAsmStreamer);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(TheAArch64leTarget,
- createAArch64MCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheAArch64beTarget,
- createAArch64MCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheARM64Target,
- createAArch64MCInstPrinter);
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 1553115..7ce303b 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -28,8 +28,10 @@ class MCRegisterInfo;
class MCObjectWriter;
class MCStreamer;
class MCSubtargetInfo;
+class MCTargetStreamer;
class StringRef;
class Target;
+class Triple;
class raw_ostream;
extern Target TheAArch64leTarget;
@@ -37,9 +39,8 @@ extern Target TheAArch64beTarget;
extern Target TheARM64Target;
MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
- MCContext &Ctx);
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx);
MCAsmBackend *createAArch64leAsmBackend(const Target &T,
const MCRegisterInfo &MRI, StringRef TT,
StringRef CPU);
@@ -53,11 +54,14 @@ MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
MCObjectWriter *createAArch64MachObjectWriter(raw_ostream &OS, uint32_t CPUType,
uint32_t CPUSubtype);
-MCStreamer *
-createAArch64MCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst);
+MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm);
+
+MCTargetStreamer *createAArch64ObjectTargetStreamer(MCStreamer &S,
+ const MCSubtargetInfo &STI);
+
} // End llvm namespace
// Defines symbolic names for AArch64 registers. This defines a mapping from
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index bc6c7a9..160c1c5 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -19,10 +19,10 @@
using namespace llvm;
StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
- for (unsigned i = 0; i < NumPairs; ++i) {
- if (Pairs[i].Value == Value) {
+ for (unsigned i = 0; i < NumMappings; ++i) {
+ if (Mappings[i].Value == Value) {
Valid = true;
- return Pairs[i].Name;
+ return Mappings[i].Name;
}
}
@@ -32,10 +32,10 @@ StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
uint32_t AArch64NamedImmMapper::fromString(StringRef Name, bool &Valid) const {
std::string LowerCaseName = Name.lower();
- for (unsigned i = 0; i < NumPairs; ++i) {
- if (Pairs[i].Name == LowerCaseName) {
+ for (unsigned i = 0; i < NumMappings; ++i) {
+ if (Mappings[i].Name == LowerCaseName) {
Valid = true;
- return Pairs[i].Value;
+ return Mappings[i].Value;
}
}
@@ -47,7 +47,7 @@ bool AArch64NamedImmMapper::validImm(uint32_t Value) const {
return Value < TooBigImm;
}
-const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATMappings[] = {
{"s1e1r", S1E1R},
{"s1e2r", S1E2R},
{"s1e3r", S1E3R},
@@ -63,9 +63,9 @@ const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATPairs[] = {
};
AArch64AT::ATMapper::ATMapper()
- : AArch64NamedImmMapper(ATPairs, 0) {}
+ : AArch64NamedImmMapper(ATMappings, 0) {}
-const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierMappings[] = {
{"oshld", OSHLD},
{"oshst", OSHST},
{"osh", OSH},
@@ -81,9 +81,9 @@ const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierPairs[]
};
AArch64DB::DBarrierMapper::DBarrierMapper()
- : AArch64NamedImmMapper(DBarrierPairs, 16u) {}
+ : AArch64NamedImmMapper(DBarrierMappings, 16u) {}
-const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCMappings[] = {
{"zva", ZVA},
{"ivac", IVAC},
{"isw", ISW},
@@ -95,25 +95,25 @@ const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCPairs[] = {
};
AArch64DC::DCMapper::DCMapper()
- : AArch64NamedImmMapper(DCPairs, 0) {}
+ : AArch64NamedImmMapper(DCMappings, 0) {}
-const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICMappings[] = {
{"ialluis", IALLUIS},
{"iallu", IALLU},
{"ivau", IVAU}
};
AArch64IC::ICMapper::ICMapper()
- : AArch64NamedImmMapper(ICPairs, 0) {}
+ : AArch64NamedImmMapper(ICMappings, 0) {}
-const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBMappings[] = {
{"sy", SY},
};
AArch64ISB::ISBMapper::ISBMapper()
- : AArch64NamedImmMapper(ISBPairs, 16) {}
+ : AArch64NamedImmMapper(ISBMappings, 16) {}
-const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMMappings[] = {
{"pldl1keep", PLDL1KEEP},
{"pldl1strm", PLDL1STRM},
{"pldl2keep", PLDL2KEEP},
@@ -135,18 +135,18 @@ const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMPairs[] = {
};
AArch64PRFM::PRFMMapper::PRFMMapper()
- : AArch64NamedImmMapper(PRFMPairs, 32) {}
+ : AArch64NamedImmMapper(PRFMMappings, 32) {}
-const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStatePairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStateMappings[] = {
{"spsel", SPSel},
{"daifset", DAIFSet},
{"daifclr", DAIFClr}
};
AArch64PState::PStateMapper::PStateMapper()
- : AArch64NamedImmMapper(PStatePairs, 0) {}
+ : AArch64NamedImmMapper(PStateMappings, 0) {}
-const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = {
{"mdccsr_el0", MDCCSR_EL0},
{"dbgdtrrx_el0", DBGDTRRX_EL0},
{"mdrar_el1", MDRAR_EL1},
@@ -247,11 +247,11 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSPairs[] = {
AArch64SysReg::MRSMapper::MRSMapper(uint64_t FeatureBits)
: SysRegMapper(FeatureBits) {
- InstPairs = &MRSPairs[0];
- NumInstPairs = llvm::array_lengthof(MRSPairs);
+ InstMappings = &MRSMappings[0];
+ NumInstMappings = llvm::array_lengthof(MRSMappings);
}
-const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRMappings[] = {
{"dbgdtrtx_el0", DBGDTRTX_EL0},
{"oslar_el1", OSLAR_EL1},
{"pmswinc_el0", PMSWINC_EL0},
@@ -271,12 +271,12 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRPairs[] = {
AArch64SysReg::MSRMapper::MSRMapper(uint64_t FeatureBits)
: SysRegMapper(FeatureBits) {
- InstPairs = &MSRPairs[0];
- NumInstPairs = llvm::array_lengthof(MSRPairs);
+ InstMappings = &MSRMappings[0];
+ NumInstMappings = llvm::array_lengthof(MSRMappings);
}
-const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings[] = {
{"osdtrrx_el1", OSDTRRX_EL1},
{"osdtrtx_el1", OSDTRTX_EL1},
{"teecr32_el1", TEECR32_EL1},
@@ -756,7 +756,7 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegPairs[]
};
const AArch64NamedImmMapper::Mapping
-AArch64SysReg::SysRegMapper::CycloneSysRegPairs[] = {
+AArch64SysReg::SysRegMapper::CycloneSysRegMappings[] = {
{"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3}
};
@@ -765,29 +765,29 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
std::string NameLower = Name.lower();
// First search the registers shared by all
- for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
- if (SysRegPairs[i].Name == NameLower) {
+ for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {
+ if (SysRegMappings[i].Name == NameLower) {
Valid = true;
- return SysRegPairs[i].Value;
+ return SysRegMappings[i].Value;
}
}
// Next search for target specific registers
if (FeatureBits & AArch64::ProcCyclone) {
- for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) {
- if (CycloneSysRegPairs[i].Name == NameLower) {
+ for (unsigned i = 0; i < array_lengthof(CycloneSysRegMappings); ++i) {
+ if (CycloneSysRegMappings[i].Name == NameLower) {
Valid = true;
- return CycloneSysRegPairs[i].Value;
+ return CycloneSysRegMappings[i].Value;
}
}
}
// Now try the instruction-specific registers (either read-only or
// write-only).
- for (unsigned i = 0; i < NumInstPairs; ++i) {
- if (InstPairs[i].Name == NameLower) {
+ for (unsigned i = 0; i < NumInstMappings; ++i) {
+ if (InstMappings[i].Name == NameLower) {
Valid = true;
- return InstPairs[i].Value;
+ return InstMappings[i].Value;
}
}
@@ -816,26 +816,26 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
std::string
AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const {
// First search the registers shared by all
- for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
- if (SysRegPairs[i].Value == Bits) {
- return SysRegPairs[i].Name;
+ for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {
+ if (SysRegMappings[i].Value == Bits) {
+ return SysRegMappings[i].Name;
}
}
// Next search for target specific registers
if (FeatureBits & AArch64::ProcCyclone) {
- for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) {
- if (CycloneSysRegPairs[i].Value == Bits) {
- return CycloneSysRegPairs[i].Name;
+ for (unsigned i = 0; i < array_lengthof(CycloneSysRegMappings); ++i) {
+ if (CycloneSysRegMappings[i].Value == Bits) {
+ return CycloneSysRegMappings[i].Name;
}
}
}
// Now try the instruction-specific registers (either read-only or
// write-only).
- for (unsigned i = 0; i < NumInstPairs; ++i) {
- if (InstPairs[i].Value == Bits) {
- return InstPairs[i].Name;
+ for (unsigned i = 0; i < NumInstMappings; ++i) {
+ if (InstMappings[i].Value == Bits) {
+ return InstMappings[i].Name;
}
}
@@ -850,7 +850,7 @@ AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const {
+ "_c" + utostr(CRm) + "_" + utostr(Op2);
}
-const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIMappings[] = {
{"ipas2e1is", IPAS2E1IS},
{"ipas2le1is", IPAS2LE1IS},
{"vmalle1is", VMALLE1IS},
@@ -886,4 +886,4 @@ const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIPairs[] = {
};
AArch64TLBI::TLBIMapper::TLBIMapper()
- : AArch64NamedImmMapper(TLBIPairs, 0) {}
+ : AArch64NamedImmMapper(TLBIMappings, 0) {}
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index c60b09a..2ae6f52 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -283,8 +283,8 @@ struct AArch64NamedImmMapper {
};
template<int N>
- AArch64NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm)
- : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {}
+ AArch64NamedImmMapper(const Mapping (&Mappings)[N], uint32_t TooBigImm)
+ : Mappings(&Mappings[0]), NumMappings(N), TooBigImm(TooBigImm) {}
StringRef toString(uint32_t Value, bool &Valid) const;
uint32_t fromString(StringRef Name, bool &Valid) const;
@@ -294,8 +294,8 @@ struct AArch64NamedImmMapper {
/// N being 0 indicates no immediate syntax-form is allowed.
bool validImm(uint32_t Value) const;
protected:
- const Mapping *Pairs;
- size_t NumPairs;
+ const Mapping *Mappings;
+ size_t NumMappings;
uint32_t TooBigImm;
};
@@ -317,7 +317,7 @@ namespace AArch64AT {
};
struct ATMapper : AArch64NamedImmMapper {
- const static Mapping ATPairs[];
+ const static Mapping ATMappings[];
ATMapper();
};
@@ -341,7 +341,7 @@ namespace AArch64DB {
};
struct DBarrierMapper : AArch64NamedImmMapper {
- const static Mapping DBarrierPairs[];
+ const static Mapping DBarrierMappings[];
DBarrierMapper();
};
@@ -361,7 +361,7 @@ namespace AArch64DC {
};
struct DCMapper : AArch64NamedImmMapper {
- const static Mapping DCPairs[];
+ const static Mapping DCMappings[];
DCMapper();
};
@@ -378,7 +378,7 @@ namespace AArch64IC {
struct ICMapper : AArch64NamedImmMapper {
- const static Mapping ICPairs[];
+ const static Mapping ICMappings[];
ICMapper();
};
@@ -394,7 +394,7 @@ namespace AArch64ISB {
SY = 0xf
};
struct ISBMapper : AArch64NamedImmMapper {
- const static Mapping ISBPairs[];
+ const static Mapping ISBMappings[];
ISBMapper();
};
@@ -424,7 +424,7 @@ namespace AArch64PRFM {
};
struct PRFMMapper : AArch64NamedImmMapper {
- const static Mapping PRFMPairs[];
+ const static Mapping PRFMMappings[];
PRFMMapper();
};
@@ -439,7 +439,7 @@ namespace AArch64PState {
};
struct PStateMapper : AArch64NamedImmMapper {
- const static Mapping PStatePairs[];
+ const static Mapping PStateMappings[];
PStateMapper();
};
@@ -1134,11 +1134,11 @@ namespace AArch64SysReg {
// burdening the common AArch64NamedImmMapper with abstractions only needed in
// this one case.
struct SysRegMapper {
- static const AArch64NamedImmMapper::Mapping SysRegPairs[];
- static const AArch64NamedImmMapper::Mapping CycloneSysRegPairs[];
+ static const AArch64NamedImmMapper::Mapping SysRegMappings[];
+ static const AArch64NamedImmMapper::Mapping CycloneSysRegMappings[];
- const AArch64NamedImmMapper::Mapping *InstPairs;
- size_t NumInstPairs;
+ const AArch64NamedImmMapper::Mapping *InstMappings;
+ size_t NumInstMappings;
uint64_t FeatureBits;
SysRegMapper(uint64_t FeatureBits) : FeatureBits(FeatureBits) { }
@@ -1147,12 +1147,12 @@ namespace AArch64SysReg {
};
struct MSRMapper : SysRegMapper {
- static const AArch64NamedImmMapper::Mapping MSRPairs[];
+ static const AArch64NamedImmMapper::Mapping MSRMappings[];
MSRMapper(uint64_t FeatureBits);
};
struct MRSMapper : SysRegMapper {
- static const AArch64NamedImmMapper::Mapping MRSPairs[];
+ static const AArch64NamedImmMapper::Mapping MRSMappings[];
MRSMapper(uint64_t FeatureBits);
};
@@ -1197,7 +1197,7 @@ namespace AArch64TLBI {
};
struct TLBIMapper : AArch64NamedImmMapper {
- const static Mapping TLBIPairs[];
+ const static Mapping TLBIMappings[];
TLBIMapper();
};
@@ -1229,7 +1229,7 @@ namespace AArch64II {
MO_NO_FLAG,
- MO_FRAGMENT = 0x7,
+ MO_FRAGMENT = 0xf,
/// MO_PAGE - A symbol operand with this flag represents the pc-relative
/// offset of the 4K page containing the symbol. This is used with the
@@ -1257,26 +1257,31 @@ namespace AArch64II {
/// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction
MO_G0 = 6,
+ /// MO_HI12 - This flag indicates that a symbol operand represents the bits
+ /// 13-24 of a 64-bit address, used in a arithmetic immediate-shifted-left-
+ /// by-12-bits instruction.
+ MO_HI12 = 7,
+
/// MO_GOT - This flag indicates that a symbol operand represents the
/// address of the GOT entry for the symbol, rather than the address of
/// the symbol itself.
- MO_GOT = 8,
+ MO_GOT = 0x10,
/// MO_NC - Indicates whether the linker is expected to check the symbol
/// reference for overflow. For example in an ADRP/ADD pair of relocations
/// the ADRP usually does check, but not the ADD.
- MO_NC = 0x10,
+ MO_NC = 0x20,
/// MO_TLS - Indicates that the operand being accessed is some kind of
/// thread-local symbol. On Darwin, only one type of thread-local access
/// exists (pre linker-relaxation), but on ELF the TLSModel used for the
/// referee will affect interpretation.
- MO_TLS = 0x20,
+ MO_TLS = 0x40,
/// MO_CONSTPOOL - This flag indicates that a symbol operand represents
/// the address of a constant pool entry for the symbol, rather than the
/// address of the symbol itself.
- MO_CONSTPOOL = 0x40
+ MO_CONSTPOOL = 0x80
};
} // end namespace AArch64II