aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/AArch64
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AArch64')
-rw-r--r--lib/Target/AArch64/AArch64.h3
-rw-r--r--lib/Target/AArch64/AArch64.td2
-rw-r--r--lib/Target/AArch64/AArch64A53Fix835769.cpp13
-rw-r--r--lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp40
-rw-r--r--lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp8
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp40
-rw-r--r--lib/Target/AArch64/AArch64BranchRelaxation.cpp4
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.h141
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.td47
-rw-r--r--lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp8
-rw-r--r--lib/Target/AArch64/AArch64CollectLOH.cpp10
-rw-r--r--lib/Target/AArch64/AArch64ConditionOptimizer.cpp2
-rw-r--r--lib/Target/AArch64/AArch64ConditionalCompares.cpp6
-rw-r--r--lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp4
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp70
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp35
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp176
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp292
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h20
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td2
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp7
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h2
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td87
-rw-r--r--lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp22
-rw-r--r--lib/Target/AArch64/AArch64PBQPRegAlloc.cpp6
-rw-r--r--lib/Target/AArch64/AArch64PromoteConstant.cpp152
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp20
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.cpp9
-rw-r--r--lib/Target/AArch64/AArch64StorePairSuppress.cpp19
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp15
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h6
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp56
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h6
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp315
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.h147
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp80
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp12
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h13
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp42
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h3
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp3
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp89
45 files changed, 1220 insertions, 822 deletions
diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h
index e96d18b..21106c9 100644
--- a/lib/Target/AArch64/AArch64.h
+++ b/lib/Target/AArch64/AArch64.h
@@ -40,9 +40,6 @@ FunctionPass *createAArch64ConditionOptimizerPass();
FunctionPass *createAArch64AddressTypePromotionPass();
FunctionPass *createAArch64A57FPLoadBalancing();
FunctionPass *createAArch64A53Fix835769();
-/// \brief Creates an ARM-specific Target Transformation Info pass.
-ImmutablePass *
-createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM);
FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index e6a27c3..dff48f9 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -91,6 +91,8 @@ def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
+// FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
+def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA57]>;
def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64A53Fix835769.cpp b/lib/Target/AArch64/AArch64A53Fix835769.cpp
index 852a635..dd401c6 100644
--- a/lib/Target/AArch64/AArch64A53Fix835769.cpp
+++ b/lib/Target/AArch64/AArch64A53Fix835769.cpp
@@ -16,8 +16,6 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
-#include "AArch64InstrInfo.h"
-#include "AArch64Subtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -26,6 +24,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
@@ -79,7 +78,7 @@ static bool isSecondInstructionInSequence(MachineInstr *MI) {
namespace {
class AArch64A53Fix835769 : public MachineFunctionPass {
- const AArch64InstrInfo *TII;
+ const TargetInstrInfo *TII;
public:
static char ID;
@@ -107,17 +106,13 @@ char AArch64A53Fix835769::ID = 0;
bool
AArch64A53Fix835769::runOnMachineFunction(MachineFunction &F) {
- const TargetMachine &TM = F.getTarget();
-
- bool Changed = false;
DEBUG(dbgs() << "***** AArch64A53Fix835769 *****\n");
-
- TII = TM.getSubtarget<AArch64Subtarget>().getInstrInfo();
+ bool Changed = false;
+ TII = F.getSubtarget().getInstrInfo();
for (auto &MBB : F) {
Changed |= runOnBasicBlock(MBB);
}
-
return Changed;
}
diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index 2503764..2cf3c22 100644
--- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -38,8 +38,8 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -96,6 +96,10 @@ static bool isMla(MachineInstr *MI) {
}
}
+namespace llvm {
+static void initializeAArch64A57FPLoadBalancingPass(PassRegistry &);
+}
+
//===----------------------------------------------------------------------===//
namespace {
@@ -109,14 +113,15 @@ static const char *ColorNames[2] = { "Even", "Odd" };
class Chain;
class AArch64A57FPLoadBalancing : public MachineFunctionPass {
- const AArch64InstrInfo *TII;
MachineRegisterInfo *MRI;
const TargetRegisterInfo *TRI;
RegisterClassInfo RCI;
public:
static char ID;
- explicit AArch64A57FPLoadBalancing() : MachineFunctionPass(ID) {}
+ explicit AArch64A57FPLoadBalancing() : MachineFunctionPass(ID) {
+ initializeAArch64A57FPLoadBalancingPass(*PassRegistry::getPassRegistry());
+ }
bool runOnMachineFunction(MachineFunction &F) override;
@@ -143,8 +148,16 @@ private:
Color getColor(unsigned Register);
Chain *getAndEraseNext(Color PreferredColor, std::vector<Chain*> &L);
};
+}
+
char AArch64A57FPLoadBalancing::ID = 0;
+INITIALIZE_PASS_BEGIN(AArch64A57FPLoadBalancing, DEBUG_TYPE,
+ "AArch64 A57 FP Load-Balancing", false, false)
+INITIALIZE_PASS_END(AArch64A57FPLoadBalancing, DEBUG_TYPE,
+ "AArch64 A57 FP Load-Balancing", false, false)
+
+namespace {
/// A Chain is a sequence of instructions that are linked together by
/// an accumulation operand. For example:
///
@@ -259,7 +272,7 @@ public:
}
/// Return true if this chain starts before Other.
- bool startsBefore(Chain *Other) {
+ bool startsBefore(const Chain *Other) const {
return StartInstIdx < Other->StartInstIdx;
}
@@ -297,10 +310,8 @@ bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction &F) {
bool Changed = false;
DEBUG(dbgs() << "***** AArch64A57FPLoadBalancing *****\n");
- const TargetMachine &TM = F.getTarget();
MRI = &F.getRegInfo();
TRI = F.getRegInfo().getTargetRegisterInfo();
- TII = TM.getSubtarget<AArch64Subtarget>().getInstrInfo();
RCI.runOnMachineFunction(F);
for (auto &MBB : F) {
@@ -431,10 +442,17 @@ bool AArch64A57FPLoadBalancing::colorChainSet(std::vector<Chain*> GV,
// chains that we cannot change before we look at those we can,
// so the parity counter is updated and we know what color we should
// change them to!
+ // Final tie-break with instruction order so pass output is stable (i.e. not
+ // dependent on malloc'd pointer values).
std::sort(GV.begin(), GV.end(), [](const Chain *G1, const Chain *G2) {
if (G1->size() != G2->size())
return G1->size() > G2->size();
- return G1->requiresFixup() > G2->requiresFixup();
+ if (G1->requiresFixup() != G2->requiresFixup())
+ return G1->requiresFixup() > G2->requiresFixup();
+ // Make sure startsBefore() produces a stable final order.
+ assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) &&
+ "Starts before not total order!");
+ return G1->startsBefore(G2);
});
Color PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
@@ -481,10 +499,16 @@ int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C,
RS.forward(I);
AvailableRegs &= RS.getRegsAvailable(TRI->getRegClass(RegClassID));
- // Remove any registers clobbered by a regmask.
+ // Remove any registers clobbered by a regmask or any def register that is
+ // immediately dead.
for (auto J : I->operands()) {
if (J.isRegMask())
AvailableRegs.clearBitsNotInMask(J.getRegMask());
+
+ if (J.isReg() && J.isDef() && AvailableRegs[J.getReg()]) {
+ assert(J.isDead() && "Non-dead def should have been removed by now!");
+ AvailableRegs.reset(J.getReg());
+ }
}
}
diff --git a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index 5afe0f4..f27dfc9 100644
--- a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
+++ b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -64,7 +64,7 @@ STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted");
namespace {
class AArch64AdvSIMDScalar : public MachineFunctionPass {
MachineRegisterInfo *MRI;
- const AArch64InstrInfo *TII;
+ const TargetInstrInfo *TII;
private:
// isProfitableToTransform - Predicate function to determine whether an
@@ -268,7 +268,7 @@ AArch64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const {
return TransformAll;
}
-static MachineInstr *insertCopy(const AArch64InstrInfo *TII, MachineInstr *MI,
+static MachineInstr *insertCopy(const TargetInstrInfo *TII, MachineInstr *MI,
unsigned Dst, unsigned Src, bool IsKill) {
MachineInstrBuilder MIB =
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AArch64::COPY),
@@ -376,10 +376,8 @@ bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
bool Changed = false;
DEBUG(dbgs() << "***** AArch64AdvSIMDScalar *****\n");
- const TargetMachine &TM = mf.getTarget();
MRI = &mf.getRegInfo();
- TII = static_cast<const AArch64InstrInfo *>(
- TM.getSubtargetImpl()->getInstrInfo());
+ TII = mf.getSubtarget().getInstrInfo();
// Just check things on a one-block-at-a-time basis.
for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I)
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 8bee4f5..d64d851 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -43,19 +43,13 @@ using namespace llvm;
namespace {
class AArch64AsmPrinter : public AsmPrinter {
- /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
- /// make the right decision when printing asm code for different targets.
- const AArch64Subtarget *Subtarget;
-
AArch64MCInstLower MCInstLowering;
StackMaps SM;
public:
- AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer),
- Subtarget(&TM.getSubtarget<AArch64Subtarget>()),
- MCInstLowering(OutContext, *this), SM(*this), AArch64FI(nullptr),
- LOHLabelCounter(0) {}
+ AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this),
+ SM(*this), AArch64FI(nullptr), LOHLabelCounter(0) {}
const char *getPassName() const override {
return "AArch64 Assembly Printer";
@@ -124,7 +118,8 @@ private:
//===----------------------------------------------------------------------===//
void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
- if (Subtarget->isTargetMachO()) {
+ Triple TT(TM.getTargetTriple());
+ if (TT.isOSBinFormatMachO()) {
// Funny Darwin hack: This flag tells the linker that no global symbols
// contain code that falls through to other global symbols (e.g. the obvious
// implementation of multiple entry points). If this doesn't occur, the
@@ -135,7 +130,7 @@ void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
}
// Emit a .data.rel section containing any stubs that were created.
- if (Subtarget->isTargetELF()) {
+ if (TT.isOSBinFormatELF()) {
const TargetLoweringObjectFileELF &TLOFELF =
static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
@@ -145,7 +140,7 @@ void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
if (!Stubs.empty()) {
OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
- const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *TD = TM.getDataLayout();
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
OutStreamer.EmitLabel(Stubs[i].first);
@@ -252,8 +247,8 @@ bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
const TargetRegisterClass *RC,
bool isVector, raw_ostream &O) {
assert(MO.isReg() && "Should only get here with a register!");
- const AArch64RegisterInfo *RI = static_cast<const AArch64RegisterInfo *>(
- TM.getSubtargetImpl()->getRegisterInfo());
+ const AArch64RegisterInfo *RI =
+ MF->getSubtarget<AArch64Subtarget>().getRegisterInfo();
unsigned Reg = MO.getReg();
unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg));
assert(RI->regsOverlap(RegToPrint, Reg));
@@ -381,8 +376,23 @@ void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
unsigned NumNOPBytes = MI.getOperand(1).getImm();
SM.recordStackMap(MI);
- // Emit padding.
assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
+
+ // Scan ahead to trim the shadow.
+ const MachineBasicBlock &MBB = *MI.getParent();
+ MachineBasicBlock::const_iterator MII(MI);
+ ++MII;
+ while (NumNOPBytes > 0) {
+ if (MII == MBB.end() || MII->isCall() ||
+ MII->getOpcode() == AArch64::DBG_VALUE ||
+ MII->getOpcode() == TargetOpcode::PATCHPOINT ||
+ MII->getOpcode() == TargetOpcode::STACKMAP)
+ break;
+ ++MII;
+ NumNOPBytes -= 4;
+ }
+
+ // Emit nops.
for (unsigned i = 0; i < NumNOPBytes; i += 4)
EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
}
diff --git a/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
index e2b6367..d973234 100644
--- a/lib/Target/AArch64/AArch64BranchRelaxation.cpp
+++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
@@ -476,9 +476,7 @@ bool AArch64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "***** AArch64BranchRelaxation *****\n");
- TII = (const AArch64InstrInfo *)MF->getTarget()
- .getSubtargetImpl()
- ->getInstrInfo();
+ TII = (const AArch64InstrInfo *)MF->getSubtarget().getInstrInfo();
// Renumber all of the machine basic blocks in the function, guaranteeing that
// the numbers agree with the position of the block in the function.
diff --git a/lib/Target/AArch64/AArch64CallingConvention.h b/lib/Target/AArch64/AArch64CallingConvention.h
new file mode 100644
index 0000000..1e2d1c3
--- /dev/null
+++ b/lib/Target/AArch64/AArch64CallingConvention.h
@@ -0,0 +1,141 @@
+//=== AArch64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the AArch64 Calling Convention
+// that aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64CALLINGCONVENTION_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64CALLINGCONVENTION_H
+
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace {
+using namespace llvm;
+
+static const uint16_t XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
+ AArch64::X3, AArch64::X4, AArch64::X5,
+ AArch64::X6, AArch64::X7};
+static const uint16_t HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,
+ AArch64::H3, AArch64::H4, AArch64::H5,
+ AArch64::H6, AArch64::H7};
+static const uint16_t SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
+ AArch64::S3, AArch64::S4, AArch64::S5,
+ AArch64::S6, AArch64::S7};
+static const uint16_t DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
+ AArch64::D3, AArch64::D4, AArch64::D5,
+ AArch64::D6, AArch64::D7};
+static const uint16_t QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
+ AArch64::Q3, AArch64::Q4, AArch64::Q5,
+ AArch64::Q6, AArch64::Q7};
+
+static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
+ MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
+ CCState &State, unsigned SlotAlign) {
+ unsigned Size = LocVT.getSizeInBits() / 8;
+ unsigned StackAlign = State.getMachineFunction()
+ .getTarget()
+ .getDataLayout()
+ ->getStackAlignment();
+ unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign);
+
+ for (auto &It : PendingMembers) {
+ It.convertToMem(State.AllocateStack(Size, std::max(Align, SlotAlign)));
+ State.addLoc(It);
+ SlotAlign = 1;
+ }
+
+ // All pending members have now been allocated
+ PendingMembers.clear();
+ return true;
+}
+
+/// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An
+/// [N x Ty] type must still be contiguous in memory though.
+static bool CC_AArch64_Custom_Stack_Block(
+ unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+ // Add the argument to the list to be allocated once we know the size of the
+ // block.
+ PendingMembers.push_back(
+ CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+
+ if (!ArgFlags.isInConsecutiveRegsLast())
+ return true;
+
+ return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, 8);
+}
+
+/// Given an [N x Ty] block, it should be passed in a consecutive sequence of
+/// registers. If no such sequence is available, mark the rest of the registers
+/// of that type as used and place the argument on the stack.
+static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ // Try to allocate a contiguous block of registers, each of the correct
+ // size to hold one member.
+ ArrayRef<uint16_t> RegList;
+ if (LocVT.SimpleTy == MVT::i64)
+ RegList = XRegList;
+ else if (LocVT.SimpleTy == MVT::f16)
+ RegList = HRegList;
+ else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector())
+ RegList = SRegList;
+ else if (LocVT.SimpleTy == MVT::f64 || LocVT.is64BitVector())
+ RegList = DRegList;
+ else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector())
+ RegList = QRegList;
+ else {
+ // Not an array we want to split up after all.
+ return false;
+ }
+
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+ // Add the argument to the list to be allocated once we know the size of the
+ // block.
+ PendingMembers.push_back(
+ CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+
+ if (!ArgFlags.isInConsecutiveRegsLast())
+ return true;
+
+ unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
+ if (RegResult) {
+ for (auto &It : PendingMembers) {
+ It.convertToReg(RegResult);
+ State.addLoc(It);
+ ++RegResult;
+ }
+ PendingMembers.clear();
+ return true;
+ }
+
+ // Mark all regs in the class as unavailable
+ for (auto Reg : RegList)
+ State.AllocateReg(Reg);
+
+ const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
+ State.getMachineFunction().getSubtarget());
+ unsigned SlotAlign = Subtarget.isTargetDarwin() ? 1 : 8;
+
+ return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
+}
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td
index 9e707e4..4691e94 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/lib/Target/AArch64/AArch64CallingConvention.td
@@ -16,7 +16,7 @@ class CCIfAlign<string Align, CCAction A> :
CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
/// CCIfBigEndian - Match only if we're in big endian mode.
class CCIfBigEndian<CCAction A> :
- CCIf<"State.getMachineFunction().getSubtarget().getDataLayout()->isBigEndian()", A>;
+ CCIf<"State.getMachineFunction().getTarget().getDataLayout()->isBigEndian()", A>;
//===----------------------------------------------------------------------===//
// ARM AAPCS64 Calling Convention
@@ -40,6 +40,8 @@ def CC_AArch64_AAPCS : CallingConv<[
// slot is 64-bit.
CCIfByVal<CCPassByVal<8, 8>>,
+ CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
+
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
@@ -119,6 +121,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[
// slot is 64-bit.
CCIfByVal<CCPassByVal<8, 8>>,
+ CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
+
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
@@ -159,6 +163,8 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
+ CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Stack_Block">>,
+
// Handle all scalar types as either i64 or f64.
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
CCIfType<[f16, f32], CCPromoteToType<f64>>,
@@ -198,6 +204,44 @@ def RetCC_AArch64_WebKit_JS : CallingConv<[
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
]>;
+//===----------------------------------------------------------------------===//
+// ARM64 Calling Convention for GHC
+//===----------------------------------------------------------------------===//
+
+// This calling convention is specific to the Glasgow Haskell Compiler.
+// The only documentation is the GHC source code, specifically the C header
+// file:
+//
+// https://github.com/ghc/ghc/blob/master/includes/stg/MachRegs.h
+//
+// which defines the registers for the Spineless Tagless G-Machine (STG) that
+// GHC uses to implement lazy evaluation. The generic STG machine has a set of
+// registers which are mapped to appropriate set of architecture specific
+// registers for each CPU architecture.
+//
+// The STG Machine is documented here:
+//
+// https://ghc.haskell.org/trac/ghc/wiki/Commentary/Compiler/GeneratedCode
+//
+// The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI
+// register mapping".
+
+def CC_AArch64_GHC : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
+ CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>,
+ CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>,
+
+ // Promote i8/i16/i32 arguments to i64.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim
+ CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>>
+]>;
+
// FIXME: LR is only callee-saved in the sense that *we* preserve it and are
// presumably a callee to someone. External functions may not do so, but this
// is currently safe since BL has LR as an implicit-def and what happens after a
@@ -243,3 +287,4 @@ def CSR_AArch64_AllRegs
(sequence "S%u", 0, 31), (sequence "D%u", 0, 31),
(sequence "Q%u", 0, 31))>;
+def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>;
diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index aab8e38..3b74481 100644
--- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -92,9 +92,7 @@ struct LDTLSCleanup : public MachineFunctionPass {
MachineInstr *replaceTLSBaseAddrCall(MachineInstr *I,
unsigned TLSBaseAddrReg) {
MachineFunction *MF = I->getParent()->getParent();
- const AArch64TargetMachine *TM =
- static_cast<const AArch64TargetMachine *>(&MF->getTarget());
- const AArch64InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
// Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
// code sequence assumes the address will be.
@@ -112,9 +110,7 @@ struct LDTLSCleanup : public MachineFunctionPass {
// inserting a copy instruction after I. Returns the new instruction.
MachineInstr *setRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
MachineFunction *MF = I->getParent()->getParent();
- const AArch64TargetMachine *TM =
- static_cast<const AArch64TargetMachine *>(&MF->getTarget());
- const AArch64InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
// Create a virtual register for the TLS base address.
MachineRegisterInfo &RegInfo = MF->getRegInfo();
diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp
index 87b545b..938dcb3 100644
--- a/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -285,9 +285,7 @@ static void initReachingDef(MachineFunction &MF,
BlockToSetOfInstrsPerColor &ReachableUses,
const MapRegToId &RegToId,
const MachineInstr *DummyOp, bool ADRPMode) {
- const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
-
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
unsigned NbReg = RegToId.size();
for (MachineBasicBlock &MBB : MF) {
@@ -1026,8 +1024,7 @@ static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId,
}
bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
- const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
MapRegToId RegToId;
@@ -1043,8 +1040,7 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *DummyOp = nullptr;
if (BasicBlockScopeOnly) {
- const AArch64InstrInfo *TII = static_cast<const AArch64InstrInfo *>(
- TM.getSubtargetImpl()->getInstrInfo());
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
// For local analysis, create a dummy operation to record uses that are not
// local.
DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc());
diff --git a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
index 0fbd3c6..e68571f 100644
--- a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
@@ -304,7 +304,7 @@ bool AArch64ConditionOptimizer::adjustTo(MachineInstr *CmpMI,
bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
<< "********** Function: " << MF.getName() << '\n');
- TII = MF.getTarget().getSubtargetImpl()->getInstrInfo();
+ TII = MF.getSubtarget().getInstrInfo();
DomTree = &getAnalysis<MachineDominatorTree>();
bool Changed = false;
diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 54f53dc..fccd8df 100644
--- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -893,15 +893,13 @@ bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
<< "********** Function: " << MF.getName() << '\n');
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getSubtarget().getRegisterInfo();
- SchedModel =
- MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel();
+ SchedModel = MF.getSubtarget().getSchedModel();
MRI = &MF.getRegInfo();
DomTree = &getAnalysis<MachineDominatorTree>();
Loops = getAnalysisIfAvailable<MachineLoopInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
- MinSize = MF.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::MinSize);
+ MinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
bool Changed = false;
CmpConv.runOnMachineFunction(MF);
diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index c850680..41b1132 100644
--- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -229,7 +229,7 @@ static bool isStartChunk(uint64_t Chunk) {
if (Chunk == 0 || Chunk == UINT64_MAX)
return false;
- return (CountLeadingOnes_64(Chunk) + countTrailingZeros(Chunk)) == 64;
+ return isMask_64(~Chunk);
}
/// \brief Check whether this chunk matches the pattern '0...1...' This pattern
@@ -239,7 +239,7 @@ static bool isEndChunk(uint64_t Chunk) {
if (Chunk == 0 || Chunk == UINT64_MAX)
return false;
- return (countLeadingZeros(Chunk) + CountTrailingOnes_64(Chunk)) == 64;
+ return isMask_64(Chunk);
}
/// \brief Clear or set all bits in the chunk at the given index.
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 612cb00..61017c1 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
+#include "AArch64CallingConvention.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
@@ -244,9 +245,10 @@ public:
unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
- const TargetLibraryInfo *LibInfo)
+ const TargetLibraryInfo *LibInfo)
: FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
- Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+ Subtarget =
+ &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
Context = &FuncInfo.Fn->getContext();
}
@@ -301,6 +303,8 @@ static unsigned getImplicitScaleFactor(MVT VT) {
CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
if (CC == CallingConv::WebKit_JS)
return CC_AArch64_WebKit_JS;
+ if (CC == CallingConv::GHC)
+ return CC_AArch64_GHC;
return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
}
@@ -366,6 +370,24 @@ unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
}
+ // For the MachO large code model materialize the FP constant in code.
+ if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
+ unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
+ const TargetRegisterClass *RC = Is64Bit ?
+ &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+
+ unsigned TmpReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
+ .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(TmpReg, getKillRegState(true));
+
+ return ResultReg;
+ }
+
// Materialize via constant pool. MachineConstantPool wants an explicit
// alignment.
unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
@@ -752,7 +774,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
if (Addr.getOffsetReg())
break;
- if (DL.getTypeSizeInBits(Ty) != 8)
+ if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
break;
const Value *LHS = U->getOperand(0);
@@ -2112,15 +2134,15 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
int TestBit = -1;
bool IsCmpNE;
- if ((Predicate == CmpInst::ICMP_EQ) || (Predicate == CmpInst::ICMP_NE)) {
- if (const auto *C = dyn_cast<Constant>(LHS))
- if (C->isNullValue())
- std::swap(LHS, RHS);
-
- if (!isa<Constant>(RHS))
- return false;
+ switch (Predicate) {
+ default:
+ return false;
+ case CmpInst::ICMP_EQ:
+ case CmpInst::ICMP_NE:
+ if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
+ std::swap(LHS, RHS);
- if (!cast<Constant>(RHS)->isNullValue())
+ if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
return false;
if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
@@ -2143,26 +2165,27 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
TestBit = 0;
IsCmpNE = Predicate == CmpInst::ICMP_NE;
- } else if (Predicate == CmpInst::ICMP_SLT) {
- if (!isa<Constant>(RHS))
- return false;
-
- if (!cast<Constant>(RHS)->isNullValue())
+ break;
+ case CmpInst::ICMP_SLT:
+ case CmpInst::ICMP_SGE:
+ if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
return false;
TestBit = BW - 1;
- IsCmpNE = true;
- } else if (Predicate == CmpInst::ICMP_SGT) {
+ IsCmpNE = Predicate == CmpInst::ICMP_SLT;
+ break;
+ case CmpInst::ICMP_SGT:
+ case CmpInst::ICMP_SLE:
if (!isa<ConstantInt>(RHS))
return false;
- if (cast<ConstantInt>(RHS)->getValue() != -1)
+ if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
return false;
TestBit = BW - 1;
- IsCmpNE = false;
- } else
- return false;
+ IsCmpNE = Predicate == CmpInst::ICMP_SLE;
+ break;
+ } // end switch
static const unsigned OpcTable[2][2][2] = {
{ {AArch64::CBZW, AArch64::CBZX },
@@ -3302,8 +3325,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
MFI->setFrameAddressIsTaken(true);
const AArch64RegisterInfo *RegInfo =
- static_cast<const AArch64RegisterInfo *>(
- TM.getSubtargetImpl()->getRegisterInfo());
+ static_cast<const AArch64RegisterInfo *>(Subtarget->getRegisterInfo());
unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index a7779d6..84bf317 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -64,8 +64,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
return false;
// Don't use the red zone if the function explicitly asks us not to.
// This is typically used for kernel code.
- if (MF.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::NoRedZone))
+ if (MF.getFunction()->hasFnAttribute(Attribute::NoRedZone))
return false;
const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -167,7 +166,7 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
if (CSI.empty())
return;
- const DataLayout *TD = MF.getSubtarget().getDataLayout();
+ const DataLayout *TD = MF.getTarget().getDataLayout();
bool HasFP = hasFP(MF);
// Calculate amount of bytes used for return address storing.
@@ -196,7 +195,8 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
nullptr, DwarfReg, Offset - TotalSkipped));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
}
@@ -214,6 +214,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
bool HasFP = hasFP(MF);
DebugLoc DL = MBB.findDebugLoc(MBBI);
+ // All calls are tail calls in GHC calling conv, and functions have no
+ // prologue/epilogue.
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+ return;
+
int NumBytes = (int)MFI->getStackSize();
if (!AFI->hasStackFrame()) {
assert(!HasFP && "unexpected function without stack frame but with FP");
@@ -234,7 +239,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
} else if (NumBytes) {
++NumRedZoneFunctions;
}
@@ -301,7 +307,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false);
if (needsFrameMoves) {
- const DataLayout *TD = MF.getSubtarget().getDataLayout();
+ const DataLayout *TD = MF.getTarget().getDataLayout();
const int StackGrowth = -TD->getPointerSize(0);
unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -377,26 +383,30 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
// Record the location of the stored LR
unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true);
CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createOffset(nullptr, LR, StackGrowth));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
// Record the location of the stored FP
CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
} else {
// Encode the stack size of the leaf function.
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
// Now emit the moves for whatever callee saved regs we have.
@@ -445,6 +455,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
int NumBytes = MFI->getStackSize();
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ // All calls are tail calls in GHC calling conv, and functions have no
+ // prologue/epilogue.
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+ return;
+
// Initial and residual are named for consitency with the prologue. Note that
// in the epilogue, the residual adjustment is executed first.
uint64_t ArgumentPopSize = 0;
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 87a6d80..ac11c4d 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -53,12 +53,10 @@ public:
}
bool runOnMachineFunction(MachineFunction &MF) override {
- AttributeSet FnAttrs = MF.getFunction()->getAttributes();
ForCodeSize =
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize) ||
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
- Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+ MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||
+ MF.getFunction()->hasFnAttribute(Attribute::MinSize);
+ Subtarget = &MF.getSubtarget<AArch64Subtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
@@ -134,8 +132,8 @@ public:
/// Generic helper for the createDTuple/createQTuple
/// functions. Those should almost always be called instead.
- SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
- unsigned SubRegs[]);
+ SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
+ const unsigned SubRegs[]);
SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
@@ -569,6 +567,27 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
return isWorthFolding(N);
}
+/// If there's a use of this ADDlow that's not itself a load/store then we'll
+/// need to create a real ADD instruction from it anyway and there's no point in
+/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
+/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
+/// leads to duplaicated ADRP instructions.
+static bool isWorthFoldingADDlow(SDValue N) {
+ for (auto Use : N->uses()) {
+ if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
+ Use->getOpcode() != ISD::ATOMIC_LOAD &&
+ Use->getOpcode() != ISD::ATOMIC_STORE)
+ return false;
+
+ // ldar and stlr have much more restrictive addressing modes (just a
+ // register).
+ if (cast<MemSDNode>(Use)->getOrdering() > Monotonic)
+ return false;
+ }
+
+ return true;
+}
+
/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
/// immediate" address. The "Size" argument is the size in bytes of the memory
/// reference, which determines the scale.
@@ -582,7 +601,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
return true;
}
- if (N.getOpcode() == AArch64ISD::ADDlow) {
+ if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
GlobalAddressSDNode *GAN =
dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
Base = N.getOperand(0);
@@ -594,7 +613,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
unsigned Alignment = GV->getAlignment();
const DataLayout *DL = TLI->getDataLayout();
Type *Ty = GV->getType()->getElementType();
- if (Alignment == 0 && Ty->isSized() && !Subtarget->isTargetDarwin())
+ if (Alignment == 0 && Ty->isSized())
Alignment = DL->getABITypeAlignment(Ty);
if (Alignment >= Size)
@@ -869,26 +888,26 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
}
SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
- static unsigned RegClassIDs[] = {
+ static const unsigned RegClassIDs[] = {
AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
- static unsigned SubRegs[] = { AArch64::dsub0, AArch64::dsub1,
- AArch64::dsub2, AArch64::dsub3 };
+ static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
+ AArch64::dsub2, AArch64::dsub3};
return createTuple(Regs, RegClassIDs, SubRegs);
}
SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
- static unsigned RegClassIDs[] = {
+ static const unsigned RegClassIDs[] = {
AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
- static unsigned SubRegs[] = { AArch64::qsub0, AArch64::qsub1,
- AArch64::qsub2, AArch64::qsub3 };
+ static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
+ AArch64::qsub2, AArch64::qsub3};
return createTuple(Regs, RegClassIDs, SubRegs);
}
SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
- unsigned RegClassIDs[],
- unsigned SubRegs[]) {
+ const unsigned RegClassIDs[],
+ const unsigned SubRegs[]) {
// There's no special register-class for a vector-list of 1 element: it's just
// a vector.
if (Regs.size() == 1)
@@ -1033,13 +1052,10 @@ SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs,
EVT VT = N->getValueType(0);
SDValue Chain = N->getOperand(0);
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(N->getOperand(2)); // Mem operand;
- Ops.push_back(Chain);
+ SDValue Ops[] = {N->getOperand(2), // Mem operand;
+ Chain};
- std::vector<EVT> ResTys;
- ResTys.push_back(MVT::Untyped);
- ResTys.push_back(MVT::Other);
+ EVT ResTys[] = {MVT::Untyped, MVT::Other};
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
SDValue SuperReg = SDValue(Ld, 0);
@@ -1057,15 +1073,12 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
EVT VT = N->getValueType(0);
SDValue Chain = N->getOperand(0);
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(N->getOperand(1)); // Mem operand
- Ops.push_back(N->getOperand(2)); // Incremental
- Ops.push_back(Chain);
+ SDValue Ops[] = {N->getOperand(1), // Mem operand
+ N->getOperand(2), // Incremental
+ Chain};
- std::vector<EVT> ResTys;
- ResTys.push_back(MVT::i64); // Type of the write back register
- ResTys.push_back(MVT::Untyped);
- ResTys.push_back(MVT::Other);
+ EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Untyped, MVT::Other};
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
@@ -1096,10 +1109,7 @@ SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(N->getOperand(NumVecs + 2));
- Ops.push_back(N->getOperand(0));
+ SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
return St;
@@ -1109,20 +1119,18 @@ SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
unsigned Opc) {
SDLoc dl(N);
EVT VT = N->getOperand(2)->getValueType(0);
- SmallVector<EVT, 2> ResTys;
- ResTys.push_back(MVT::i64); // Type of the write back register
- ResTys.push_back(MVT::Other); // Type for the Chain
+ EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Other}; // Type for the Chain
// Form a REG_SEQUENCE to force register allocation.
bool Is128Bit = VT.getSizeInBits() == 128;
SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(N->getOperand(NumVecs + 1)); // base register
- Ops.push_back(N->getOperand(NumVecs + 2)); // Incremental
- Ops.push_back(N->getOperand(0)); // Chain
+ SDValue Ops[] = {RegSeq,
+ N->getOperand(NumVecs + 1), // base register
+ N->getOperand(NumVecs + 2), // Incremental
+ N->getOperand(0)}; // Chain
SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
return St;
@@ -1176,18 +1184,13 @@ SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
SDValue RegSeq = createQTuple(Regs);
- std::vector<EVT> ResTys;
- ResTys.push_back(MVT::Untyped);
- ResTys.push_back(MVT::Other);
+ EVT ResTys[] = {MVT::Untyped, MVT::Other};
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
- Ops.push_back(N->getOperand(NumVecs + 3));
- Ops.push_back(N->getOperand(0));
+ SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, MVT::i64),
+ N->getOperand(NumVecs + 3), N->getOperand(0)};
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
SDValue SuperReg = SDValue(Ld, 0);
@@ -1221,20 +1224,17 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
SDValue RegSeq = createQTuple(Regs);
- std::vector<EVT> ResTys;
- ResTys.push_back(MVT::i64); // Type of the write back register
- ResTys.push_back(MVT::Untyped);
- ResTys.push_back(MVT::Other);
+ EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Untyped, MVT::Other};
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); // Lane Number
- Ops.push_back(N->getOperand(NumVecs + 2)); // Base register
- Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
- Ops.push_back(N->getOperand(0));
+ SDValue Ops[] = {RegSeq,
+ CurDAG->getTargetConstant(LaneNo, MVT::i64), // Lane Number
+ N->getOperand(NumVecs + 2), // Base register
+ N->getOperand(NumVecs + 3), // Incremental
+ N->getOperand(0)};
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
// Update uses of the write back register
@@ -1282,11 +1282,8 @@ SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
- Ops.push_back(N->getOperand(NumVecs + 3));
- Ops.push_back(N->getOperand(0));
+ SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, MVT::i64),
+ N->getOperand(NumVecs + 3), N->getOperand(0)};
SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
// Transfer memoperands.
@@ -1312,19 +1309,16 @@ SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
SDValue RegSeq = createQTuple(Regs);
- SmallVector<EVT, 2> ResTys;
- ResTys.push_back(MVT::i64); // Type of the write back register
- ResTys.push_back(MVT::Other);
+ EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Other};
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
- Ops.push_back(N->getOperand(NumVecs + 2)); // Base Register
- Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
- Ops.push_back(N->getOperand(0));
+ SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, MVT::i64),
+ N->getOperand(NumVecs + 2), // Base Register
+ N->getOperand(NumVecs + 3), // Incremental
+ N->getOperand(0)};
SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
// Transfer memoperands.
@@ -1403,12 +1397,17 @@ static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
} else
return false;
- assert((BiggerPattern || (Srl_imm > 0 && Srl_imm < VT.getSizeInBits())) &&
- "bad amount in shift node!");
+ // Bail out on large immediates. This happens when no proper
+ // combining/constant folding was performed.
+ if (!BiggerPattern && (Srl_imm <= 0 || Srl_imm >= VT.getSizeInBits())) {
+ DEBUG((dbgs() << N
+ << ": Found large shift immediate, this should not happen\n"));
+ return false;
+ }
LSB = Srl_imm;
- MSB = Srl_imm + (VT == MVT::i32 ? CountTrailingOnes_32(And_imm)
- : CountTrailingOnes_64(And_imm)) -
+ MSB = Srl_imm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(And_imm)
+ : countTrailingOnes<uint64_t>(And_imm)) -
1;
if (ClampMSB)
// Since we're moving the extend before the right shift operation, we need
@@ -1452,7 +1451,7 @@ static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
return false;
// Check whether we really have several bits extract here.
- unsigned BitWide = 64 - CountLeadingOnes_64(~(And_mask >> Srl_imm));
+ unsigned BitWide = 64 - countLeadingOnes(~(And_mask >> Srl_imm));
if (BitWide && isMask_64(And_mask >> Srl_imm)) {
if (N->getValueType(0) == MVT::i32)
Opc = AArch64::UBFMWri;
@@ -1508,7 +1507,14 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
} else
return false;
- assert(Shl_imm < VT.getSizeInBits() && "bad amount in shift node!");
+ // Missing combines/constant folding may have left us with strange
+ // constants.
+ if (Shl_imm >= VT.getSizeInBits()) {
+ DEBUG((dbgs() << N
+ << ": Found large shift immediate, this should not happen\n"));
+ return false;
+ }
+
uint64_t Srl_imm = 0;
if (!isIntImmediate(N->getOperand(1), Srl_imm))
return false;
@@ -1851,7 +1857,7 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
return false;
ShiftAmount = countTrailingZeros(NonZeroBits);
- MaskWidth = CountTrailingOnes_64(NonZeroBits >> ShiftAmount);
+ MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
// BFI encompasses sufficiently many nodes that it's worth inserting an extra
// LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
@@ -2229,11 +2235,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
SDValue MemAddr = Node->getOperand(4);
// Place arguments in the right order.
- SmallVector<SDValue, 7> Ops;
- Ops.push_back(ValLo);
- Ops.push_back(ValHi);
- Ops.push_back(MemAddr);
- Ops.push_back(Chain);
+ SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
// Transfer memoperands.
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7c94d83..a1b324e 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64ISelLowering.h"
+#include "AArch64CallingConvention.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64PerfectShuffle.h"
#include "AArch64Subtarget.h"
@@ -66,10 +67,9 @@ EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
cl::desc("Allow AArch64 SLI/SRI formation"),
cl::init(false));
-
-AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
- : TargetLowering(TM) {
- Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
+ const AArch64Subtarget &STI)
+ : TargetLowering(TM), Subtarget(&STI) {
// AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
// we have to make something up. Arbitrarily, choose ZeroOrOne.
@@ -111,7 +111,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
}
// Compute derived properties from the register classes
- computeRegisterProperties();
+ computeRegisterProperties(Subtarget->getRegisterInfo());
// Provide all sorts of operation actions
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
@@ -386,13 +386,24 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
}
+ // Make floating-point constants legal for the large code model, so they don't
+ // become loads from the constant pool.
+ if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+ }
+
// AArch64 does not have floating-point extending loads, i1 sign-extending
// load, floating-point truncating stores, or v2i32->v2i16 truncating store.
- setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand);
+ for (MVT VT : MVT::fp_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
+ }
+ for (MVT VT : MVT::integer_valuetypes())
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
+
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
@@ -531,26 +542,22 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
// Likewise, narrowing and extending vector loads/stores aren't handled
// directly.
- for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,
- Expand);
-
- setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
-
- setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
-
- for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
- setTruncStoreAction((MVT::SimpleValueType)VT,
- (MVT::SimpleValueType)InnerVT, Expand);
- setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
- setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ for (MVT VT : MVT::vector_valuetypes()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
+
+ setOperationAction(ISD::MULHS, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
+ setOperationAction(ISD::BSWAP, VT, Expand);
+
+ for (MVT InnerVT : MVT::vector_valuetypes()) {
+ setTruncStoreAction(VT, InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
+ }
}
// AArch64 has implementations of a lot of rounding-like FP operations.
@@ -615,7 +622,8 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
setOperationAction(ISD::VSELECT, VT.getSimpleVT(), Expand);
- setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
+ for (MVT InnerVT : MVT::all_valuetypes())
+ setLoadExtAction(ISD::EXTLOAD, InnerVT, VT.getSimpleVT(), Expand);
// CNT supports only B element sizes.
if (VT != MVT::v8i8 && VT != MVT::v16i8)
@@ -722,13 +730,6 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const {
return MVT::i64;
}
-unsigned AArch64TargetLowering::getMaximalGlobalOffset() const {
- // FIXME: On AArch64, this depends on the type.
- // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
- // and the offset has to be a multiple of the related size in bytes.
- return 4095;
-}
-
FastISel *
AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const {
@@ -869,9 +870,8 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
// EndBB:
// Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
DebugLoc DL = MI->getDebugLoc();
MachineFunction::iterator It = MBB;
@@ -1330,10 +1330,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
RTLIB::Libcall Call) const {
- SmallVector<SDValue, 2> Ops;
- for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i)
- Ops.push_back(Op.getOperand(i));
-
+ SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
return makeLibCall(DAG, Call, MVT::f128, &Ops[0], Ops.size(), false,
SDLoc(Op)).first;
}
@@ -1561,10 +1558,7 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
else
LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
- SmallVector<SDValue, 2> Ops;
- for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i)
- Ops.push_back(Op.getOperand(i));
-
+ SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
return makeLibCall(DAG, LC, Op.getValueType(), &Ops[0], Ops.size(), false,
SDLoc(Op)).first;
}
@@ -1981,6 +1975,8 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
llvm_unreachable("Unsupported calling convention.");
case CallingConv::WebKit_JS:
return CC_AArch64_WebKit_JS;
+ case CallingConv::GHC:
+ return CC_AArch64_GHC;
case CallingConv::C:
case CallingConv::Fast:
if (!Subtarget->isTargetDarwin())
@@ -2012,18 +2008,19 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
unsigned CurArgIdx = 0;
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ValVT = Ins[i].VT;
- std::advance(CurOrigArg, Ins[i].OrigArgIndex - CurArgIdx);
- CurArgIdx = Ins[i].OrigArgIndex;
-
- // Get type of the original argument.
- EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true);
- MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
- // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
- if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
- ValVT = MVT::i8;
- else if (ActualMVT == MVT::i16)
- ValVT = MVT::i16;
+ if (Ins[i].isOrigArg()) {
+ std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
+ CurArgIdx = Ins[i].getOrigArgIndex();
+ // Get type of the original argument.
+ EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true);
+ MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
+ // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
+ if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
+ ValVT = MVT::i8;
+ else if (ActualMVT == MVT::i16)
+ ValVT = MVT::i16;
+ }
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
bool Res =
AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
@@ -2106,7 +2103,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
uint32_t BEAlign = 0;
- if (ArgSize < 8 && !Subtarget->isLittleEndian())
+ if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
+ !Ins[i].Flags.isInConsecutiveRegs())
BEAlign = 8 - ArgSize;
int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
@@ -2198,8 +2196,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
AArch64::X3, AArch64::X4, AArch64::X5,
AArch64::X6, AArch64::X7 };
static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
- unsigned FirstVariadicGPR =
- CCInfo.getFirstUnallocated(GPRArgRegs, NumGPRArgRegs);
+ unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
int GPRIdx = 0;
@@ -2227,8 +2224,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
- unsigned FirstVariadicFPR =
- CCInfo.getFirstUnallocated(FPRArgRegs, NumFPRArgRegs);
+ unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
int FPRIdx = 0;
@@ -2349,7 +2345,9 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
// cannot rely on the linker replacing the tail call with a return.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
- if (GV->hasExternalWeakLinkage())
+ const Triple TT(getTargetMachine().getTargetTriple());
+ if (GV->hasExternalWeakLinkage() &&
+ (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
return false;
}
@@ -2660,7 +2658,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
: VA.getValVT().getSizeInBits();
OpSize = (OpSize + 7) / 8;
- if (!Subtarget->isLittleEndian() && !Flags.isByVal()) {
+ if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
+ !Flags.isInConsecutiveRegs()) {
if (OpSize < 8)
BEAlign = 8 - OpSize;
}
@@ -2782,19 +2781,16 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Add a register mask operand representing the call-preserved registers.
const uint32_t *Mask;
- const TargetRegisterInfo *TRI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
- const AArch64RegisterInfo *ARI =
- static_cast<const AArch64RegisterInfo *>(TRI);
+ const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
if (IsThisReturn) {
// For 'this' returns, use the X0-preserving mask if applicable
- Mask = ARI->getThisReturnPreservedMask(CallConv);
+ Mask = TRI->getThisReturnPreservedMask(CallConv);
if (!Mask) {
IsThisReturn = false;
- Mask = ARI->getCallPreservedMask(CallConv);
+ Mask = TRI->getCallPreservedMask(CallConv);
}
} else
- Mask = ARI->getCallPreservedMask(CallConv);
+ Mask = TRI->getCallPreservedMask(CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -3014,11 +3010,8 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
- const TargetRegisterInfo *TRI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
- const AArch64RegisterInfo *ARI =
- static_cast<const AArch64RegisterInfo *>(TRI);
- const uint32_t *Mask = ARI->getTLSCallPreservedMask();
+ const uint32_t *Mask =
+ Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
// Finally, we can make the call. This is just a degenerate version of a
// normal AArch64 call node: x0 takes the address of the descriptor, and
@@ -3065,11 +3058,8 @@ SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr,
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
- const TargetRegisterInfo *TRI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
- const AArch64RegisterInfo *ARI =
- static_cast<const AArch64RegisterInfo *>(TRI);
- const uint32_t *Mask = ARI->getTLSCallPreservedMask();
+ const uint32_t *Mask =
+ Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
// The function takes only one argument: the address of the descriptor itself
// in X0.
@@ -3259,8 +3249,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
OFCC = getInvertedCondCode(OFCC);
SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
- return DAG.getNode(AArch64ISD::BRCOND, SDLoc(LHS), MVT::Other, Chain, Dest,
- CCVal, Overflow);
+ return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
+ Overflow);
}
if (LHS.getValueType().isInteger()) {
@@ -3429,8 +3419,8 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
}
SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
- if (DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::NoImplicitFloat))
+ if (DAG.getMachineFunction().getFunction()->hasFnAttribute(
+ Attribute::NoImplicitFloat))
return SDValue();
if (!Subtarget->hasNEON())
@@ -3447,18 +3437,12 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
SDValue Val = Op.getOperand(0);
SDLoc DL(Op);
EVT VT = Op.getValueType();
- SDValue ZeroVec = DAG.getUNDEF(MVT::v8i8);
- SDValue VecVal;
- if (VT == MVT::i32) {
- VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
- VecVal = DAG.getTargetInsertSubreg(AArch64::ssub, DL, MVT::v8i8, ZeroVec,
- VecVal);
- } else {
- VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
- }
+ if (VT == MVT::i32)
+ Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
- SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, VecVal);
+ SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
SDValue UaddLV = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, MVT::i32), CtPop);
@@ -4279,7 +4263,8 @@ AArch64TargetLowering::getSingleConstraintMatchWeight(
std::pair<unsigned, const TargetRegisterClass *>
AArch64TargetLowering::getRegForInlineAsmConstraint(
- const std::string &Constraint, MVT VT) const {
+ const TargetRegisterInfo *TRI, const std::string &Constraint,
+ MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
@@ -4308,7 +4293,7 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
// Use the default implementation in TargetLowering to convert the register
// constraint into a member of a register class.
std::pair<unsigned, const TargetRegisterClass *> Res;
- Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+ Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
// Not found as a standard register?
if (!Res.second) {
@@ -4615,19 +4600,21 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
// The extraction can just take the second half
Src.ShuffleVec =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
- DAG.getIntPtrConstant(NumSrcElts));
+ DAG.getConstant(NumSrcElts, MVT::i64));
Src.WindowBase = -NumSrcElts;
} else if (Src.MaxElt < NumSrcElts) {
// The extraction can just take the first half
- Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
- Src.ShuffleVec, DAG.getIntPtrConstant(0));
+ Src.ShuffleVec =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(0, MVT::i64));
} else {
// An actual VEXT is needed
- SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
- Src.ShuffleVec, DAG.getIntPtrConstant(0));
+ SDValue VEXTSrc1 =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(0, MVT::i64));
SDValue VEXTSrc2 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
- DAG.getIntPtrConstant(NumSrcElts));
+ DAG.getConstant(NumSrcElts, MVT::i64));
unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
@@ -6270,6 +6257,8 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
AArch64CC::CondCode CC, bool NoNans, EVT VT,
SDLoc dl, SelectionDAG &DAG) {
EVT SrcVT = LHS.getValueType();
+ assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
+ "function only supposed to emit natural comparisons");
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
APInt CnstBits(VT.getSizeInBits(), 0);
@@ -6364,13 +6353,15 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
+ EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger();
SDLoc dl(Op);
if (LHS.getValueType().getVectorElementType().isInteger()) {
assert(LHS.getValueType() == RHS.getValueType());
AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
- return EmitVectorComparison(LHS, RHS, AArch64CC, false, Op.getValueType(),
- dl, DAG);
+ SDValue Cmp =
+ EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
+ return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
}
assert(LHS.getValueType().getVectorElementType() == MVT::f32 ||
@@ -6384,19 +6375,21 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
SDValue Cmp =
- EmitVectorComparison(LHS, RHS, CC1, NoNaNs, Op.getValueType(), dl, DAG);
+ EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
if (!Cmp.getNode())
return SDValue();
if (CC2 != AArch64CC::AL) {
SDValue Cmp2 =
- EmitVectorComparison(LHS, RHS, CC2, NoNaNs, Op.getValueType(), dl, DAG);
+ EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
if (!Cmp2.getNode())
return SDValue();
- Cmp = DAG.getNode(ISD::OR, dl, Cmp.getValueType(), Cmp, Cmp2);
+ Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
}
+ Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
+
if (ShouldInvert)
return Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
@@ -6534,6 +6527,34 @@ bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
return NumBits1 > NumBits2;
}
+/// Check if it is profitable to hoist instruction in then/else to if.
+/// Not profitable if I and it's user can form a FMA instruction
+/// because we prefer FMSUB/FMADD.
+bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
+ if (I->getOpcode() != Instruction::FMul)
+ return true;
+
+ if (I->getNumUses() != 1)
+ return true;
+
+ Instruction *User = I->user_back();
+
+ if (User &&
+ !(User->getOpcode() == Instruction::FSub ||
+ User->getOpcode() == Instruction::FAdd))
+ return true;
+
+ const TargetOptions &Options = getTargetMachine().Options;
+ EVT VT = getValueType(User->getOperand(0)->getType());
+
+ if (isFMAFasterThanFMulAndFAdd(VT) &&
+ isOperationLegalOrCustom(ISD::FMA, VT) &&
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath))
+ return false;
+
+ return true;
+}
+
// All 32-bit GPR operations implicitly zero the high-half of the corresponding
// 64-bit GPR.
bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
@@ -6604,8 +6625,7 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
bool Fast;
const Function *F = MF.getFunction();
if (Subtarget->hasFPARMv8() && !IsMemset && Size >= 16 &&
- !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NoImplicitFloat) &&
+ !F->hasFnAttribute(Attribute::NoImplicitFloat) &&
(memOpAlign(SrcAlign, DstAlign, 16) ||
(allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast)))
return MVT::f128;
@@ -6948,7 +6968,8 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
return SDValue();
}
-static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
// First try to optimize away the conversion when it's conditionally from
// a constant. Vectors only.
SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
@@ -6967,7 +6988,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) {
// conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
// This eliminates an "integer-to-vector-move UOP and improve throughput.
SDValue N0 = N->getOperand(0);
- if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
// Do not change the width of a volatile load.
!cast<LoadSDNode>(N0)->isVolatile()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -7756,9 +7777,9 @@ static SDValue performExtendCombine(SDNode *N,
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
LoVT.getVectorNumElements());
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
- DAG.getIntPtrConstant(0));
+ DAG.getConstant(0, MVT::i64));
Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
- DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ DAG.getConstant(InNVT.getVectorNumElements(), MVT::i64));
Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
@@ -7839,14 +7860,13 @@ static SDValue performSTORECombine(SDNode *N,
return SDValue();
// Cyclone has bad performance on unaligned 16B stores when crossing line and
- // page boundries. We want to split such stores.
+ // page boundaries. We want to split such stores.
if (!Subtarget->isCyclone())
return SDValue();
// Don't split at Oz.
MachineFunction &MF = DAG.getMachineFunction();
- bool IsMinSize = MF.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::MinSize);
+ bool IsMinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
if (IsMinSize)
return SDValue();
@@ -7880,9 +7900,9 @@ static SDValue performSTORECombine(SDNode *N,
EVT HalfVT =
EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts);
SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
- DAG.getIntPtrConstant(0));
+ DAG.getConstant(0, MVT::i64));
SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
- DAG.getIntPtrConstant(NumElts));
+ DAG.getConstant(NumElts, MVT::i64));
SDValue BasePtr = S->getBasePtr();
SDValue NewST1 =
DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
@@ -7973,7 +7993,7 @@ static SDValue performPostLD1Combine(SDNode *N,
LoadSDN->getMemOperand());
// Update the uses.
- std::vector<SDValue> NewResults;
+ SmallVector<SDValue, 2> NewResults;
NewResults.push_back(SDValue(LD, 0)); // The result of load
NewResults.push_back(SDValue(UpdN.getNode(), 2)); // Chain
DCI.CombineTo(LD, NewResults);
@@ -8478,6 +8498,12 @@ static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
// largest real NEON comparison is 64-bits per lane, which means the result is
// at most 32-bits and an illegal vector. Just bail out for now.
EVT SrcVT = N0.getOperand(0).getValueType();
+
+ // Don't try to do this optimization when the setcc itself has i1 operands.
+ // There are no legal vectors of i1, so this would be pointless.
+ if (SrcVT == MVT::i1)
+ return SDValue();
+
int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
if (!ResVT.isVector() || NumMaskElts == 0)
return SDValue();
@@ -8518,7 +8544,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performMulCombine(N, DAG, DCI, Subtarget);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
- return performIntToFpCombine(N, DAG);
+ return performIntToFpCombine(N, DAG, Subtarget);
case ISD::OR:
return performORCombine(N, DCI, Subtarget);
case ISD::INTRINSIC_WO_CHAIN:
@@ -8696,13 +8722,12 @@ bool AArch64TargetLowering::getPostIndexedAddressParts(
static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) {
- if (N->getValueType(0) != MVT::i16)
- return;
-
SDLoc DL(N);
SDValue Op = N->getOperand(0);
- assert(Op.getValueType() == MVT::f16 &&
- "Inconsistent bitcast? Only 16-bit types should be i16 or f16");
+
+ if (N->getValueType(0) != MVT::i16 || Op.getValueType() != MVT::f16)
+ return;
+
Op = SDValue(
DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
DAG.getUNDEF(MVT::i32), Op,
@@ -8732,6 +8757,12 @@ bool AArch64TargetLowering::useLoadStackGuardNode() const {
return true;
}
+bool AArch64TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+ // Combine multiple FDIVs with the same divisor into multiple FMULs by the
+ // reciprocal if there are three or more FDIVs.
+ return NumUsers > 2;
+}
+
TargetLoweringBase::LegalizeTypeAction
AArch64TargetLowering::getPreferredVectorAction(EVT VT) const {
MVT SVT = VT.getSimpleVT();
@@ -8836,3 +8867,8 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
Val, Stxr->getFunctionType()->getParamType(0)),
Addr);
}
+
+bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
+ return Ty->isArrayTy();
+}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 2f5708d..e973364 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/Target/TargetLowering.h"
namespace llvm {
@@ -207,7 +208,8 @@ class AArch64TargetLowering : public TargetLowering {
bool RequireStrictAlign;
public:
- explicit AArch64TargetLowering(const TargetMachine &TM);
+ explicit AArch64TargetLowering(const TargetMachine &TM,
+ const AArch64Subtarget &STI);
/// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
@@ -222,7 +224,7 @@ public:
MVT getScalarShiftAmountTy(EVT LHSTy) const override;
/// allowsMisalignedMemoryAccesses - Returns true if the target allows
- /// unaligned memory accesses. of the specified type.
+ /// unaligned memory accesses of the specified type.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
unsigned Align = 1,
bool *Fast = nullptr) const override {
@@ -244,10 +246,6 @@ public:
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned getFunctionAlignment(const Function *F) const;
- /// getMaximalGlobalOffset - Returns the maximal possible offset which can
- /// be used for loads / stores from the global.
- unsigned getMaximalGlobalOffset() const override;
-
/// Returns true if a cast between SrcAS and DestAS is a noop.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
// Addrspacecasts are always noops.
@@ -285,6 +283,8 @@ public:
bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
bool isTruncateFree(EVT VT1, EVT VT2) const override;
+ bool isProfitableToHoist(Instruction *I) const override;
+
bool isZExtFree(Type *Ty1, Type *Ty2) const override;
bool isZExtFree(EVT VT1, EVT VT2) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;
@@ -440,6 +440,7 @@ private:
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
std::vector<SDNode *> *Created) const override;
+ bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
ConstraintType
getConstraintType(const std::string &Constraint) const override;
@@ -452,7 +453,8 @@ private:
const char *constraint) const override;
std::pair<unsigned, const TargetRegisterClass *>
- getRegForInlineAsmConstraint(const std::string &Constraint,
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ const std::string &Constraint,
MVT VT) const override;
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
std::vector<SDValue> &Ops,
@@ -472,6 +474,10 @@ private:
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
+
+ bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
+ CallingConv::ID CallConv,
+ bool isVarArg) const override;
};
namespace AArch64 {
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index 2b0f5d2..d295c02 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -4383,7 +4383,7 @@ class BaseSIMDVectorLShiftLongBySize<bit Q, bits<2> size,
}
multiclass SIMDVectorLShiftLongBySizeBHS {
- let neverHasSideEffects = 1 in {
+ let hasSideEffects = 0 in {
def v8i8 : BaseSIMDVectorLShiftLongBySize<0, 0b00, V64,
"shll", ".8h", ".8b", "8">;
def v16i8 : BaseSIMDVectorLShiftLongBySize<1, 0b00, V128,
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 2dbb31c..64cec55 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
+#include "AArch64MachineCombinerPattern.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
-#include "AArch64MachineCombinerPattern.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -707,9 +707,8 @@ static bool UpdateOperandRegClass(MachineInstr *Instr) {
assert(MBB && "Can't get MachineBasicBlock here");
MachineFunction *MF = MBB->getParent();
assert(MF && "Can't get MachineFunction here");
- const TargetMachine *TM = &MF->getTarget();
- const TargetInstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
- const TargetRegisterInfo *TRI = TM->getSubtargetImpl()->getRegisterInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
MachineRegisterInfo *MRI = &MF->getRegInfo();
for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx;
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index 30bf650..d8f1274 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -16,8 +16,8 @@
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/CodeGen/MachineCombinerPattern.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "AArch64GenInstrInfo.inc"
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 252ed40..6e4c0b0 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -481,6 +481,24 @@ def trunc_imm : SDNodeXForm<imm, [{
def : Pat<(i64 i64imm_32bit:$src),
(SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>;
+// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model).
+def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
+return CurDAG->getTargetConstant(
+ N->getValueAPF().bitcastToAPInt().getZExtValue(), MVT::i32);
+}]>;
+
+def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
+return CurDAG->getTargetConstant(
+ N->getValueAPF().bitcastToAPInt().getZExtValue(), MVT::i64);
+}]>;
+
+
+def : Pat<(f32 fpimm:$in),
+ (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>;
+def : Pat<(f64 fpimm:$in),
+ (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>;
+
+
// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK
// sequences.
def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
@@ -639,6 +657,10 @@ def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))),
(MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))),
(MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
+def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)),
+ (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
+def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)),
+ (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
} // AddedComplexity = 7
let AddedComplexity = 5 in {
@@ -789,7 +811,7 @@ def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
//===----------------------------------------------------------------------===//
// Bitfield immediate extraction instruction.
//===----------------------------------------------------------------------===//
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
defm EXTR : ExtractImm<"extr">;
def : InstAlias<"ror $dst, $src, $shift",
(EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>;
@@ -804,7 +826,7 @@ def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)),
//===----------------------------------------------------------------------===//
// Other bitfield immediate instructions.
//===----------------------------------------------------------------------===//
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">;
defm SBFM : BitfieldImm<0b00, "sbfm">;
defm UBFM : BitfieldImm<0b10, "ubfm">;
@@ -977,9 +999,9 @@ def : InstAlias<"cneg $dst, $src, $cc",
// PC-relative instructions.
//===----------------------------------------------------------------------===//
let isReMaterializable = 1 in {
-let neverHasSideEffects = 1, mayStore = 0, mayLoad = 0 in {
+let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
def ADR : ADRI<0, "adr", adrlabel, []>;
-} // neverHasSideEffects = 1
+} // hasSideEffects = 0
def ADRP : ADRI<1, "adrp", adrplabel,
[(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>;
@@ -1867,6 +1889,33 @@ let Predicates = [IsLE] in {
}
} // AddedComplexity = 10
+// Match stores from lane 0 to the appropriate subreg's store.
+multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
+ ValueType VecTy, ValueType STy,
+ SubRegIndex SubRegIdx,
+ Instruction STRW, Instruction STRX> {
+
+ def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
+ (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
+ (STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
+ GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
+
+ def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
+ (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
+ (STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
+ GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
+}
+
+let AddedComplexity = 19 in {
+ defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>;
+ defm : VecROStoreLane0Pat<ro16, store , v8i16, i16, hsub, STRHroW, STRHroX>;
+ defm : VecROStoreLane0Pat<ro32, truncstorei32, v4i32, i32, ssub, STRSroW, STRSroX>;
+ defm : VecROStoreLane0Pat<ro32, store , v4i32, i32, ssub, STRSroW, STRSroX>;
+ defm : VecROStoreLane0Pat<ro32, store , v4f32, f32, ssub, STRSroW, STRSroX>;
+ defm : VecROStoreLane0Pat<ro64, store , v2i64, i64, dsub, STRDroW, STRDroX>;
+ defm : VecROStoreLane0Pat<ro64, store , v2f64, f64, dsub, STRDroW, STRDroX>;
+}
+
//---
// (unsigned immediate)
defm STRX : StoreUI<0b11, 0, 0b00, GPR64, uimm12s8, "str",
@@ -3667,29 +3716,21 @@ defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi32lane>;
// Floating point vector extractions are codegen'd as either a sequence of
-// subregister extractions, possibly fed by an INS if the lane number is
-// anything other than zero.
+// subregister extractions, or a MOV (aka CPY here, alias for DUP) if
+// the lane number is anything other than zero.
def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
(f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
(f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
(f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
+
def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
- (f64 (EXTRACT_SUBREG
- (INSvi64lane (v2f64 (IMPLICIT_DEF)), 0,
- V128:$Rn, VectorIndexD:$idx),
- dsub))>;
+ (f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>;
def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
- (f32 (EXTRACT_SUBREG
- (INSvi32lane (v4f32 (IMPLICIT_DEF)), 0,
- V128:$Rn, VectorIndexS:$idx),
- ssub))>;
+ (f32 (CPYi32 V128:$Rn, VectorIndexS:$idx))>;
def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
- (f16 (EXTRACT_SUBREG
- (INSvi16lane (v8f16 (IMPLICIT_DEF)), 0,
- V128:$Rn, VectorIndexH:$idx),
- hsub))>;
+ (f16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>;
// All concat_vectors operations are canonicalised to act on i64 vectors for
// AArch64. In the general case we need an instruction, which had just as well be
@@ -4124,7 +4165,7 @@ def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
// AdvSIMD indexed element
//----------------------------------------------------------------------------
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
defm FMLA : SIMDFPIndexedSDTied<0, 0b0001, "fmla">;
defm FMLS : SIMDFPIndexedSDTied<0, 0b0101, "fmls">;
}
@@ -4678,7 +4719,7 @@ defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>;
defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>;
defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>;
defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>;
-let mayLoad = 1, neverHasSideEffects = 1 in {
+let mayLoad = 1, hasSideEffects = 0 in {
defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>;
defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>;
defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>;
@@ -4768,7 +4809,7 @@ defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>;
defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
-let AddedComplexity = 15 in
+let AddedComplexity = 19 in
class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction ST1>
: Pat<(scalar_store
@@ -4784,7 +4825,7 @@ def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>;
def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>;
def : St1Lane128Pat<store, VectorIndexH, v8f16, f16, ST1i16>;
-let AddedComplexity = 15 in
+let AddedComplexity = 19 in
class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction ST1>
: Pat<(scalar_store
@@ -4848,7 +4889,7 @@ defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>;
defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>;
-let mayStore = 1, neverHasSideEffects = 1 in {
+let mayStore = 1, hasSideEffects = 0 in {
defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>;
defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>;
defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>;
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 8157981..8463ce6 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -135,6 +135,8 @@ static bool isUnscaledLdst(unsigned Opc) {
return true;
case AArch64::LDURXi:
return true;
+ case AArch64::LDURSWi:
+ return true;
}
}
@@ -173,6 +175,9 @@ int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
case AArch64::LDRXui:
case AArch64::LDURXi:
return 8;
+ case AArch64::LDRSWui:
+ case AArch64::LDURSWi:
+ return 4;
}
}
@@ -210,6 +215,9 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
case AArch64::LDRXui:
case AArch64::LDURXi:
return AArch64::LDPXi;
+ case AArch64::LDRSWui:
+ case AArch64::LDURSWi:
+ return AArch64::LDPSWi;
}
}
@@ -237,6 +245,8 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
return AArch64::LDRWpre;
case AArch64::LDRXui:
return AArch64::LDRXpre;
+ case AArch64::LDRSWui:
+ return AArch64::LDRSWpre;
}
}
@@ -264,6 +274,8 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
return AArch64::LDRWpost;
case AArch64::LDRXui:
return AArch64::LDRXpost;
+ case AArch64::LDRSWui:
+ return AArch64::LDRSWpost;
}
}
@@ -780,6 +792,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
case AArch64::LDRQui:
case AArch64::LDRXui:
case AArch64::LDRWui:
+ case AArch64::LDRSWui:
// do the unscaled versions as well
case AArch64::STURSi:
case AArch64::STURDi:
@@ -790,7 +803,8 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
case AArch64::LDURDi:
case AArch64::LDURQi:
case AArch64::LDURWi:
- case AArch64::LDURXi: {
+ case AArch64::LDURXi:
+ case AArch64::LDURSWi: {
// If this is a volatile load/store, don't mess with it.
if (MI->hasOrderedMemoryRef()) {
++MBBI;
@@ -931,10 +945,8 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
}
bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
- const TargetMachine &TM = Fn.getTarget();
- TII = static_cast<const AArch64InstrInfo *>(
- TM.getSubtargetImpl()->getInstrInfo());
- TRI = TM.getSubtargetImpl()->getRegisterInfo();
+ TII = static_cast<const AArch64InstrInfo *>(Fn.getSubtarget().getInstrInfo());
+ TRI = Fn.getSubtarget().getRegisterInfo();
bool Modified = false;
for (auto &MBB : Fn)
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
index f942c4e..4690177 100644
--- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
@@ -235,7 +235,7 @@ bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
costs[i + 1][j + 1] = sameParityMax + 1.0;
}
}
- G.setEdgeCosts(edge, std::move(costs));
+ G.updateEdgeCosts(edge, std::move(costs));
return true;
}
@@ -312,7 +312,7 @@ void A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd,
costs[i + 1][j + 1] = sameParityMax + 1.0;
}
}
- G.setEdgeCosts(edge, std::move(costs));
+ G.updateEdgeCosts(edge, std::move(costs));
}
}
}
@@ -328,7 +328,7 @@ void A57ChainingConstraint::apply(PBQPRAGraph &G) {
const MachineFunction &MF = G.getMetadata().MF;
LiveIntervals &LIs = G.getMetadata().LIS;
- TRI = MF.getTarget().getSubtargetImpl()->getRegisterInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
DEBUG(MF.dump());
for (const auto &MBB: MF) {
diff --git a/lib/Target/AArch64/AArch64PromoteConstant.cpp b/lib/Target/AArch64/AArch64PromoteConstant.cpp
index 16c33b7..c037c86 100644
--- a/lib/Target/AArch64/AArch64PromoteConstant.cpp
+++ b/lib/Target/AArch64/AArch64PromoteConstant.cpp
@@ -22,7 +22,7 @@
#include "AArch64.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Constants.h"
@@ -31,6 +31,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
@@ -112,44 +113,42 @@ private:
AU.addPreserved<DominatorTreeWrapperPass>();
}
- /// Type to store a list of User.
- typedef SmallVector<Value::user_iterator, 4> Users;
+ /// Type to store a list of Uses.
+ typedef SmallVector<Use *, 4> Uses;
/// Map an insertion point to all the uses it dominates.
- typedef DenseMap<Instruction *, Users> InsertionPoints;
+ typedef DenseMap<Instruction *, Uses> InsertionPoints;
/// Map a function to the required insertion point of load for a
/// global variable.
typedef DenseMap<Function *, InsertionPoints> InsertionPointsPerFunc;
/// Find the closest point that dominates the given Use.
- Instruction *findInsertionPoint(Value::user_iterator &Use);
+ Instruction *findInsertionPoint(Use &Use);
/// Check if the given insertion point is dominated by an existing
/// insertion point.
/// If true, the given use is added to the list of dominated uses for
/// the related existing point.
/// \param NewPt the insertion point to be checked
- /// \param UseIt the use to be added into the list of dominated uses
+ /// \param Use the use to be added into the list of dominated uses
/// \param InsertPts existing insertion points
/// \pre NewPt and all instruction in InsertPts belong to the same function
/// \return true if one of the insertion point in InsertPts dominates NewPt,
/// false otherwise
- bool isDominated(Instruction *NewPt, Value::user_iterator &UseIt,
- InsertionPoints &InsertPts);
+ bool isDominated(Instruction *NewPt, Use &Use, InsertionPoints &InsertPts);
/// Check if the given insertion point can be merged with an existing
/// insertion point in a common dominator.
/// If true, the given use is added to the list of the created insertion
/// point.
/// \param NewPt the insertion point to be checked
- /// \param UseIt the use to be added into the list of dominated uses
+ /// \param Use the use to be added into the list of dominated uses
/// \param InsertPts existing insertion points
/// \pre NewPt and all instruction in InsertPts belong to the same function
/// \pre isDominated returns false for the exact same parameters.
/// \return true if it exists an insertion point in InsertPts that could
/// have been merged with NewPt in a common dominator,
/// false otherwise
- bool tryAndMerge(Instruction *NewPt, Value::user_iterator &UseIt,
- InsertionPoints &InsertPts);
+ bool tryAndMerge(Instruction *NewPt, Use &Use, InsertionPoints &InsertPts);
/// Compute the minimal insertion points to dominates all the interesting
/// uses of value.
@@ -182,21 +181,19 @@ private:
bool promoteConstant(Constant *Cst);
/// Transfer the list of dominated uses of IPI to NewPt in InsertPts.
- /// Append UseIt to this list and delete the entry of IPI in InsertPts.
- static void appendAndTransferDominatedUses(Instruction *NewPt,
- Value::user_iterator &UseIt,
+ /// Append Use to this list and delete the entry of IPI in InsertPts.
+ static void appendAndTransferDominatedUses(Instruction *NewPt, Use &Use,
InsertionPoints::iterator &IPI,
InsertionPoints &InsertPts) {
// Record the dominated use.
- IPI->second.push_back(UseIt);
+ IPI->second.push_back(&Use);
// Transfer the dominated uses of IPI to NewPt
// Inserting into the DenseMap may invalidate existing iterator.
// Keep a copy of the key to find the iterator to erase.
Instruction *OldInstr = IPI->first;
- InsertPts.insert(InsertionPoints::value_type(NewPt, IPI->second));
+ InsertPts[NewPt] = std::move(IPI->second);
// Erase IPI.
- IPI = InsertPts.find(OldInstr);
- InsertPts.erase(IPI);
+ InsertPts.erase(OldInstr);
}
};
} // end anonymous namespace
@@ -328,23 +325,18 @@ static bool shouldConvert(const Constant *Cst) {
return isConstantUsingVectorTy(Cst->getType());
}
-Instruction *
-AArch64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) {
+Instruction *AArch64PromoteConstant::findInsertionPoint(Use &Use) {
+ Instruction *User = cast<Instruction>(Use.getUser());
+
// If this user is a phi, the insertion point is in the related
// incoming basic block.
- PHINode *PhiInst = dyn_cast<PHINode>(*Use);
- Instruction *InsertionPoint;
- if (PhiInst)
- InsertionPoint =
- PhiInst->getIncomingBlock(Use.getOperandNo())->getTerminator();
- else
- InsertionPoint = dyn_cast<Instruction>(*Use);
- assert(InsertionPoint && "User is not an instruction!");
- return InsertionPoint;
+ if (PHINode *PhiInst = dyn_cast<PHINode>(User))
+ return PhiInst->getIncomingBlock(Use.getOperandNo())->getTerminator();
+
+ return User;
}
-bool AArch64PromoteConstant::isDominated(Instruction *NewPt,
- Value::user_iterator &UseIt,
+bool AArch64PromoteConstant::isDominated(Instruction *NewPt, Use &Use,
InsertionPoints &InsertPts) {
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
@@ -363,15 +355,14 @@ bool AArch64PromoteConstant::isDominated(Instruction *NewPt,
DEBUG(dbgs() << "Insertion point dominated by:\n");
DEBUG(IPI.first->print(dbgs()));
DEBUG(dbgs() << '\n');
- IPI.second.push_back(UseIt);
+ IPI.second.push_back(&Use);
return true;
}
}
return false;
}
-bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt,
- Value::user_iterator &UseIt,
+bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt, Use &Use,
InsertionPoints &InsertPts) {
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
*NewPt->getParent()->getParent()).getDomTree();
@@ -391,7 +382,7 @@ bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt,
DEBUG(dbgs() << "Merge insertion point with:\n");
DEBUG(IPI->first->print(dbgs()));
DEBUG(dbgs() << "\nat considered insertion point.\n");
- appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts);
+ appendAndTransferDominatedUses(NewPt, Use, IPI, InsertPts);
return true;
}
@@ -415,7 +406,7 @@ bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt,
DEBUG(dbgs() << '\n');
DEBUG(NewPt->print(dbgs()));
DEBUG(dbgs() << '\n');
- appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts);
+ appendAndTransferDominatedUses(NewPt, Use, IPI, InsertPts);
return true;
}
return false;
@@ -424,22 +415,22 @@ bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt,
void AArch64PromoteConstant::computeInsertionPoints(
Constant *Val, InsertionPointsPerFunc &InsPtsPerFunc) {
DEBUG(dbgs() << "** Compute insertion points **\n");
- for (Value::user_iterator UseIt = Val->user_begin(),
- EndUseIt = Val->user_end();
- UseIt != EndUseIt; ++UseIt) {
+ for (Use &Use : Val->uses()) {
+ Instruction *User = dyn_cast<Instruction>(Use.getUser());
+
// If the user is not an Instruction, we cannot modify it.
- if (!isa<Instruction>(*UseIt))
+ if (!User)
continue;
// Filter out uses that should not be converted.
- if (!shouldConvertUse(Val, cast<Instruction>(*UseIt), UseIt.getOperandNo()))
+ if (!shouldConvertUse(Val, User, Use.getOperandNo()))
continue;
- DEBUG(dbgs() << "Considered use, opidx " << UseIt.getOperandNo() << ":\n");
- DEBUG((*UseIt)->print(dbgs()));
+ DEBUG(dbgs() << "Considered use, opidx " << Use.getOperandNo() << ":\n");
+ DEBUG(User->print(dbgs()));
DEBUG(dbgs() << '\n');
- Instruction *InsertionPoint = findInsertionPoint(UseIt);
+ Instruction *InsertionPoint = findInsertionPoint(Use);
DEBUG(dbgs() << "Considered insertion point:\n");
DEBUG(InsertionPoint->print(dbgs()));
@@ -449,17 +440,17 @@ void AArch64PromoteConstant::computeInsertionPoints(
// by another one.
InsertionPoints &InsertPts =
InsPtsPerFunc[InsertionPoint->getParent()->getParent()];
- if (isDominated(InsertionPoint, UseIt, InsertPts))
+ if (isDominated(InsertionPoint, Use, InsertPts))
continue;
// This insertion point is useful, check if we can merge some insertion
// point in a common dominator or if NewPt dominates an existing one.
- if (tryAndMerge(InsertionPoint, UseIt, InsertPts))
+ if (tryAndMerge(InsertionPoint, Use, InsertPts))
continue;
DEBUG(dbgs() << "Keep considered insertion point\n");
// It is definitely useful by its own
- InsertPts[InsertionPoint].push_back(UseIt);
+ InsertPts[InsertionPoint].push_back(&Use);
}
}
@@ -470,41 +461,32 @@ bool AArch64PromoteConstant::insertDefinitions(
bool HasChanged = false;
// Traverse all insertion points in all the function.
- for (InsertionPointsPerFunc::iterator FctToInstPtsIt = InsPtsPerFunc.begin(),
- EndIt = InsPtsPerFunc.end();
- FctToInstPtsIt != EndIt; ++FctToInstPtsIt) {
- InsertionPoints &InsertPts = FctToInstPtsIt->second;
+ for (const auto &FctToInstPtsIt : InsPtsPerFunc) {
+ const InsertionPoints &InsertPts = FctToInstPtsIt.second;
// Do more checking for debug purposes.
#ifndef NDEBUG
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
- *FctToInstPtsIt->first).getDomTree();
+ *FctToInstPtsIt.first).getDomTree();
#endif
- GlobalVariable *PromotedGV;
assert(!InsertPts.empty() && "Empty uses does not need a definition");
- Module *M = FctToInstPtsIt->first->getParent();
- DenseMap<Module *, GlobalVariable *>::iterator MapIt =
- ModuleToMergedGV.find(M);
- if (MapIt == ModuleToMergedGV.end()) {
+ Module *M = FctToInstPtsIt.first->getParent();
+ GlobalVariable *&PromotedGV = ModuleToMergedGV[M];
+ if (!PromotedGV) {
PromotedGV = new GlobalVariable(
*M, Cst->getType(), true, GlobalValue::InternalLinkage, nullptr,
"_PromotedConst", nullptr, GlobalVariable::NotThreadLocal);
PromotedGV->setInitializer(Cst);
- ModuleToMergedGV[M] = PromotedGV;
DEBUG(dbgs() << "Global replacement: ");
DEBUG(PromotedGV->print(dbgs()));
DEBUG(dbgs() << '\n');
++NumPromoted;
HasChanged = true;
- } else {
- PromotedGV = MapIt->second;
}
- for (InsertionPoints::iterator IPI = InsertPts.begin(),
- EndIPI = InsertPts.end();
- IPI != EndIPI; ++IPI) {
+ for (const auto &IPI : InsertPts) {
// Create the load of the global variable.
- IRBuilder<> Builder(IPI->first->getParent(), IPI->first);
+ IRBuilder<> Builder(IPI.first->getParent(), IPI.first);
LoadInst *LoadedCst = Builder.CreateLoad(PromotedGV);
DEBUG(dbgs() << "**********\n");
DEBUG(dbgs() << "New def: ");
@@ -512,18 +494,15 @@ bool AArch64PromoteConstant::insertDefinitions(
DEBUG(dbgs() << '\n');
// Update the dominated uses.
- Users &DominatedUsers = IPI->second;
- for (Value::user_iterator Use : DominatedUsers) {
+ for (Use *Use : IPI.second) {
#ifndef NDEBUG
- assert((DT.dominates(LoadedCst, cast<Instruction>(*Use)) ||
- (isa<PHINode>(*Use) &&
- DT.dominates(LoadedCst, findInsertionPoint(Use)))) &&
+ assert(DT.dominates(LoadedCst, findInsertionPoint(*Use)) &&
"Inserted definition does not dominate all its uses!");
#endif
- DEBUG(dbgs() << "Use to update " << Use.getOperandNo() << ":");
- DEBUG(Use->print(dbgs()));
+ DEBUG(dbgs() << "Use to update " << Use->getOperandNo() << ":");
+ DEBUG(Use->getUser()->print(dbgs()));
DEBUG(dbgs() << '\n');
- Use->setOperand(Use.getOperandNo(), LoadedCst);
+ Use->set(LoadedCst);
++NumPromotedUses;
}
}
@@ -556,22 +535,19 @@ bool AArch64PromoteConstant::runOnFunction(Function &F) {
// global variable. Create as few loads of this variable as possible and
// update the uses accordingly.
bool LocalChange = false;
- SmallSet<Constant *, 8> AlreadyChecked;
-
- for (auto &MBB : F) {
- for (auto &MI : MBB) {
- // Traverse the operand, looking for constant vectors. Replace them by a
- // load of a global variable of constant vector type.
- for (unsigned OpIdx = 0, EndOpIdx = MI.getNumOperands();
- OpIdx != EndOpIdx; ++OpIdx) {
- Constant *Cst = dyn_cast<Constant>(MI.getOperand(OpIdx));
- // There is no point in promoting global values as they are already
- // global. Do not promote constant expressions either, as they may
- // require some code expansion.
- if (Cst && !isa<GlobalValue>(Cst) && !isa<ConstantExpr>(Cst) &&
- AlreadyChecked.insert(Cst).second)
- LocalChange |= promoteConstant(Cst);
- }
+ SmallPtrSet<Constant *, 8> AlreadyChecked;
+
+ for (Instruction &I : inst_range(&F)) {
+ // Traverse the operand, looking for constant vectors. Replace them by a
+ // load of a global variable of constant vector type.
+ for (Value *Op : I.operand_values()) {
+ Constant *Cst = dyn_cast<Constant>(Op);
+ // There is no point in promoting global values as they are already
+ // global. Do not promote constant expressions either, as they may
+ // require some code expansion.
+ if (Cst && !isa<GlobalValue>(Cst) && !isa<ConstantExpr>(Cst) &&
+ AlreadyChecked.insert(Cst).second)
+ LocalChange |= promoteConstant(Cst);
}
}
return LocalChange;
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index d734d43..206cdbb 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -33,6 +33,10 @@ using namespace llvm;
#define GET_REGINFO_TARGET_DESC
#include "AArch64GenRegisterInfo.inc"
+static cl::opt<bool>
+ReserveX18("aarch64-reserve-x18", cl::Hidden,
+ cl::desc("Reserve X18, making it unavailable as GPR"));
+
AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii,
const AArch64Subtarget *sti)
: AArch64GenRegisterInfo(AArch64::LR), TII(tii), STI(sti) {}
@@ -40,6 +44,10 @@ AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii,
const MCPhysReg *
AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
assert(MF && "Invalid MachineFunction pointer.");
+ if (MF->getFunction()->getCallingConv() == CallingConv::GHC)
+ // GHC set of callee saved regs is empty as all those regs are
+ // used for passing STG regs around
+ return CSR_AArch64_NoRegs_SaveList;
if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg)
return CSR_AArch64_AllRegs_SaveList;
else
@@ -48,6 +56,9 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const uint32_t *
AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+ if (CC == CallingConv::GHC)
+ // This is academic becase all GHC calls are (supposed to be) tail calls
+ return CSR_AArch64_NoRegs_RegMask;
if (CC == CallingConv::AnyReg)
return CSR_AArch64_AllRegs_RegMask;
else
@@ -63,7 +74,7 @@ const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
}
const uint32_t *
-AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
+AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
// This should return a register mask that is the same as that returned by
// getCallPreservedMask but that additionally preserves the register used for
// the first i64 argument (which must also be the register used to return a
@@ -71,6 +82,7 @@ AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
//
// In case that the calling convention does not use the same register for
// both, the function should return NULL (does not currently apply)
+ assert(CC != CallingConv::GHC && "should not be GHC calling convention.");
return CSR_AArch64_AAPCS_ThisReturn_RegMask;
}
@@ -90,7 +102,7 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AArch64::W29);
}
- if (STI->isTargetDarwin()) {
+ if (STI->isTargetDarwin() || ReserveX18) {
Reserved.set(AArch64::X18); // Platform register
Reserved.set(AArch64::W18);
}
@@ -117,7 +129,7 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
return true;
case AArch64::X18:
case AArch64::W18:
- return STI->isTargetDarwin();
+ return STI->isTargetDarwin() || ReserveX18;
case AArch64::FP:
case AArch64::W29:
return TFI->hasFP(MF) || STI->isTargetDarwin();
@@ -379,7 +391,7 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case AArch64::GPR64commonRegClassID:
return 32 - 1 // XZR/SP
- (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP
- - STI->isTargetDarwin() // X18 reserved as platform register
+ - (STI->isTargetDarwin() || ReserveX18) // X18 reserved as platform register
- hasBasePointer(MF); // X19
case AArch64::FPR8RegClassID:
case AArch64::FPR16RegClassID:
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 0cfd582..b9c5399 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -28,15 +28,14 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
// Check to see if there is a specialized entry-point for memory zeroing.
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
+ const AArch64Subtarget &STI =
+ DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
const char *bzeroEntry =
- (V && V->isNullValue())
- ? DAG.getTarget().getSubtarget<AArch64Subtarget>().getBZeroEntry()
- : nullptr;
+ (V && V->isNullValue()) ? STI.getBZeroEntry() : nullptr;
// For small size (< 256), it is not beneficial to use bzero
// instead of memset.
if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) {
- const AArch64TargetLowering &TLI =
- *DAG.getTarget().getSubtarget<AArch64Subtarget>().getTargetLowering();
+ const AArch64TargetLowering &TLI = *STI.getTargetLowering();
EVT IntPtr = TLI.getPointerTy();
Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
diff --git a/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index 0c36e8f..85b44a2 100644
--- a/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -30,7 +30,6 @@ class AArch64StorePairSuppress : public MachineFunctionPass {
const AArch64InstrInfo *TII;
const TargetRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
- MachineFunction *MF;
TargetSchedModel SchedModel;
MachineTraceMetrics *Traces;
MachineTraceMetrics::Ensemble *MinInstr;
@@ -115,20 +114,16 @@ bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) {
}
}
-bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) {
- MF = &mf;
- TII =
- static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
- TRI = MF->getSubtarget().getRegisterInfo();
- MRI = &MF->getRegInfo();
- const TargetSubtargetInfo &ST =
- MF->getTarget().getSubtarget<TargetSubtargetInfo>();
+bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) {
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+ TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo());
+ TRI = ST.getRegisterInfo();
+ MRI = &MF.getRegInfo();
SchedModel.init(ST.getSchedModel(), &ST, TII);
-
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
- DEBUG(dbgs() << "*** " << getPassName() << ": " << MF->getName() << '\n');
+ DEBUG(dbgs() << "*** " << getPassName() << ": " << MF.getName() << '\n');
if (!SchedModel.hasInstrSchedModel()) {
DEBUG(dbgs() << " Skipping pass: no machine model present.\n");
@@ -139,7 +134,7 @@ bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) {
// precisely determine whether a store pair can be formed. But we do want to
// filter out most situations where we can't form store pairs to avoid
// computing trace metrics in those cases.
- for (auto &MBB : *MF) {
+ for (auto &MBB : MF) {
bool SuppressSTP = false;
unsigned PrevBaseReg = 0;
for (auto &MI : MBB) {
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index 47b5d54..c613025 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -48,17 +48,10 @@ AArch64Subtarget::AArch64Subtarget(const std::string &TT,
const TargetMachine &TM, bool LittleEndian)
: AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
- HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), CPUString(CPU),
- TargetTriple(TT),
- // This nested ternary is horrible, but DL needs to be properly
- // initialized
- // before TLInfo is constructed.
- DL(isTargetMachO()
- ? "e-m:o-i64:64-i128:128-n32:64-S128"
- : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128"
- : "E-m:e-i64:64-i128:128-n32:64-S128")),
- FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)),
- TSInfo(&DL), TLInfo(TM) {}
+ HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
+ IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(),
+ InstrInfo(initializeSubtargetDependencies(FS)),
+ TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {}
/// ClassifyGlobalReference - Find the target operand flags that describe
/// how a global value should be referenced for the current subtarget.
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index e2740f1..d418cc5 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -48,13 +48,14 @@ protected:
// HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
bool HasZeroCycleZeroing;
+ bool IsLittle;
+
/// CPUString - String name of used CPU.
std::string CPUString;
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
- const DataLayout DL;
AArch64FrameLowering FrameLowering;
AArch64InstrInfo InstrInfo;
AArch64SelectionDAGInfo TSInfo;
@@ -82,7 +83,6 @@ public:
return &TLInfo;
}
const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const DataLayout *getDataLayout() const override { return &DL; }
const AArch64RegisterInfo *getRegisterInfo() const override {
return &getInstrInfo()->getRegisterInfo();
}
@@ -100,7 +100,7 @@ public:
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
- bool isLittleEndian() const { return DL.isLittleEndian(); }
+ bool isLittleEndian() const { return IsLittle; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
bool isTargetIOS() const { return TargetTriple.isiOS(); }
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index beed8e0..d73d0b3 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -13,10 +13,11 @@
#include "AArch64.h"
#include "AArch64TargetMachine.h"
#include "AArch64TargetObjectFile.h"
+#include "AArch64TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/IR/Function.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
@@ -112,6 +113,13 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL,
bool LittleEndian)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ // This nested ternary is horrible, but DL needs to be properly
+ // initialized
+ // before TLInfo is constructed.
+ DL(Triple(TT).isOSBinFormatMachO()
+ ? "e-m:o-i64:64-i128:128-n32:64-S128"
+ : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128"
+ : "E-m:e-i64:64-i128:128-n32:64-S128")),
TLOF(createTLOF(Triple(getTargetTriple()))),
Subtarget(TT, CPU, FS, *this, LittleEndian), isLittle(LittleEndian) {
initAsmInfo();
@@ -121,11 +129,8 @@ AArch64TargetMachine::~AArch64TargetMachine() {}
const AArch64Subtarget *
AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
- AttributeSet FnAttrs = F.getAttributes();
- Attribute CPUAttr =
- FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
- Attribute FSAttr =
- FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+ Attribute CPUAttr = F.getFnAttribute("target-cpu");
+ Attribute FSAttr = F.getFnAttribute("target-features");
std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
? CPUAttr.getValueAsString().str()
@@ -181,19 +186,17 @@ public:
bool addPreISel() override;
bool addInstSelector() override;
bool addILPOpts() override;
- bool addPreRegAlloc() override;
- bool addPostRegAlloc() override;
- bool addPreSched2() override;
- bool addPreEmitPass() override;
+ void addPreRegAlloc() override;
+ void addPostRegAlloc() override;
+ void addPreSched2() override;
+ void addPreEmitPass() override;
};
} // namespace
-void AArch64TargetMachine::addAnalysisPasses(PassManagerBase &PM) {
- // Add first the target-independent BasicTTI pass, then our AArch64 pass. This
- // allows the AArch64 pass to delegate to the target independent layer when
- // appropriate.
- PM.add(createBasicTargetTransformInfoPass(this));
- PM.add(createAArch64TargetTransformInfoPass(this));
+TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() {
+ return TargetIRAnalysis([this](Function &F) {
+ return TargetTransformInfo(AArch64TTIImpl(this, F));
+ });
}
TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
@@ -233,8 +236,11 @@ bool AArch64PassConfig::addPreISel() {
// get a chance to be merged
if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant)
addPass(createAArch64PromoteConstantPass());
+ // FIXME: On AArch64, this depends on the type.
+ // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
+ // and the offset has to be a multiple of the related size in bytes.
if (TM->getOptLevel() != CodeGenOpt::None)
- addPass(createGlobalMergePass(TM));
+ addPass(createGlobalMergePass(TM, 4095));
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createAArch64AddressTypePromotionPass());
@@ -246,7 +252,7 @@ bool AArch64PassConfig::addInstSelector() {
// For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
// references to _TLS_MODULE_BASE_ as possible.
- if (TM->getSubtarget<AArch64Subtarget>().isTargetELF() &&
+ if (Triple(TM->getTargetTriple()).isOSBinFormatELF() &&
getOptLevel() != CodeGenOpt::None)
addPass(createAArch64CleanupLocalDynamicTLSPass());
@@ -267,7 +273,7 @@ bool AArch64PassConfig::addILPOpts() {
return true;
}
-bool AArch64PassConfig::addPreRegAlloc() {
+void AArch64PassConfig::addPreRegAlloc() {
// Use AdvSIMD scalar instructions whenever profitable.
if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) {
addPass(createAArch64AdvSIMDScalar());
@@ -275,10 +281,9 @@ bool AArch64PassConfig::addPreRegAlloc() {
// be register coaleascer friendly.
addPass(&PeepholeOptimizerID);
}
- return true;
}
-bool AArch64PassConfig::addPostRegAlloc() {
+void AArch64PassConfig::addPostRegAlloc() {
// Change dead register definitions to refer to the zero register.
if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
addPass(createAArch64DeadRegisterDefinitions());
@@ -288,26 +293,23 @@ bool AArch64PassConfig::addPostRegAlloc() {
usingDefaultRegAlloc())
// Improve performance for some FP/SIMD code for A57.
addPass(createAArch64A57FPLoadBalancing());
- return true;
}
-bool AArch64PassConfig::addPreSched2() {
+void AArch64PassConfig::addPreSched2() {
// Expand some pseudo instructions to allow proper scheduling.
addPass(createAArch64ExpandPseudoPass());
// Use load/store pair instructions when possible.
if (TM->getOptLevel() != CodeGenOpt::None && EnableLoadStoreOpt)
addPass(createAArch64LoadStoreOptimizationPass());
- return true;
}
-bool AArch64PassConfig::addPreEmitPass() {
+void AArch64PassConfig::addPreEmitPass() {
if (EnableA53Fix835769)
addPass(createAArch64A53Fix835769());
// Relax conditional branch instructions if they're otherwise out of
// range of their destination.
addPass(createAArch64BranchRelaxation());
if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
- TM->getSubtarget<AArch64Subtarget>().isTargetMachO())
+ Triple(TM->getTargetTriple()).isOSBinFormatMachO())
addPass(createAArch64CollectLOHPass());
- return true;
}
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
index 75c65c5..7143adf 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -23,6 +23,7 @@ namespace llvm {
class AArch64TargetMachine : public LLVMTargetMachine {
protected:
+ const DataLayout DL;
std::unique_ptr<TargetLoweringObjectFile> TLOF;
AArch64Subtarget Subtarget;
mutable StringMap<std::unique_ptr<AArch64Subtarget>> SubtargetMap;
@@ -35,6 +36,7 @@ public:
~AArch64TargetMachine() override;
+ const DataLayout *getDataLayout() const override { return &DL; }
const AArch64Subtarget *getSubtargetImpl() const override {
return &Subtarget;
}
@@ -43,8 +45,8 @@ public:
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- /// \brief Register AArch64 analysis passes with a pass manager.
- void addAnalysisPasses(PassManagerBase &PM) override;
+ /// \brief Get the TargetIRAnalysis for this target.
+ TargetIRAnalysis getTargetIRAnalysis() override;
TargetLoweringObjectFile* getObjFileLowering() const override {
return TLOF.get();
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b1a2914..0646d85 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1,4 +1,4 @@
-//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI pass --------===//
+//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,18 +6,11 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-/// \file
-/// This file implements a TargetTransformInfo analysis pass specific to the
-/// AArch64 target machine. It uses the target's detailed information to provide
-/// more precise answers to certain TTI queries, while letting the target
-/// independent and default TTI implementations handle the rest.
-///
-//===----------------------------------------------------------------------===//
-#include "AArch64.h"
-#include "AArch64TargetMachine.h"
+#include "AArch64TargetTransformInfo.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
@@ -26,130 +19,10 @@ using namespace llvm;
#define DEBUG_TYPE "aarch64tti"
-// Declare the pass initialization routine locally as target-specific passes
-// don't have a target-wide initialization entry point, and so we rely on the
-// pass constructor initialization.
-namespace llvm {
-void initializeAArch64TTIPass(PassRegistry &);
-}
-
-namespace {
-
-class AArch64TTI final : public ImmutablePass, public TargetTransformInfo {
- const AArch64TargetMachine *TM;
- const AArch64Subtarget *ST;
- const AArch64TargetLowering *TLI;
-
- /// Estimate the overhead of scalarizing an instruction. Insert and Extract
- /// are set if the result needs to be inserted and/or extracted from vectors.
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
-
-public:
- AArch64TTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
- llvm_unreachable("This pass cannot be directly constructed");
- }
-
- AArch64TTI(const AArch64TargetMachine *TM)
- : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
- TLI(TM->getSubtargetImpl()->getTargetLowering()) {
- initializeAArch64TTIPass(*PassRegistry::getPassRegistry());
- }
-
- void initializePass() override { pushTTIStack(this); }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- TargetTransformInfo::getAnalysisUsage(AU);
- }
-
- /// Pass identification.
- static char ID;
-
- /// Provide necessary pointer adjustments for the two base classes.
- void *getAdjustedAnalysisPointer(const void *ID) override {
- if (ID == &TargetTransformInfo::ID)
- return (TargetTransformInfo *)this;
- return this;
- }
-
- /// \name Scalar TTI Implementations
- /// @{
- unsigned getIntImmCost(int64_t Val) const;
- unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
- unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty) const override;
- unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty) const override;
- PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
-
- /// @}
-
- /// \name Vector TTI Implementations
- /// @{
-
- unsigned getNumberOfRegisters(bool Vector) const override {
- if (Vector) {
- if (ST->hasNEON())
- return 32;
- return 0;
- }
- return 31;
- }
-
- unsigned getRegisterBitWidth(bool Vector) const override {
- if (Vector) {
- if (ST->hasNEON())
- return 128;
- return 0;
- }
- return 64;
- }
-
- unsigned getMaxInterleaveFactor() const override;
-
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const
- override;
-
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const
- override;
-
- unsigned getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
- OperandValueKind Opd2Info = OK_AnyValue,
- OperandValueProperties Opd1PropInfo = OP_None,
- OperandValueProperties Opd2PropInfo = OP_None) const override;
-
- unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override;
-
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const
- override;
-
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) const override;
-
- unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override;
-
- void getUnrollingPreferences(const Function *F, Loop *L,
- UnrollingPreferences &UP) const override;
-
-
- /// @}
-};
-
-} // end anonymous namespace
-
-INITIALIZE_AG_PASS(AArch64TTI, TargetTransformInfo, "aarch64tti",
- "AArch64 Target Transform Info", true, true, false)
-char AArch64TTI::ID = 0;
-
-ImmutablePass *
-llvm::createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM) {
- return new AArch64TTI(TM);
-}
-
/// \brief Calculate the cost of materializing a 64-bit value. This helper
/// method might only calculate a fraction of a larger immediate. Therefore it
/// is valid to return a cost of ZERO.
-unsigned AArch64TTI::getIntImmCost(int64_t Val) const {
+unsigned AArch64TTIImpl::getIntImmCost(int64_t Val) {
// Check if the immediate can be encoded within an instruction.
if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
return 0;
@@ -163,7 +36,7 @@ unsigned AArch64TTI::getIntImmCost(int64_t Val) const {
}
/// \brief Calculate the cost of materializing the given constant.
-unsigned AArch64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
+unsigned AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -187,25 +60,25 @@ unsigned AArch64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
return std::max(1U, Cost);
}
-unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty) const {
+unsigned AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
// There is no cost model for constants with a bit size of 0. Return TCC_Free
// here, so that constant hoisting will ignore this constant.
if (BitSize == 0)
- return TCC_Free;
+ return TTI::TCC_Free;
unsigned ImmIdx = ~0U;
switch (Opcode) {
default:
- return TCC_Free;
+ return TTI::TCC_Free;
case Instruction::GetElementPtr:
// Always hoist the base address of a GetElementPtr.
if (Idx == 0)
- return 2 * TCC_Basic;
- return TCC_Free;
+ return 2 * TTI::TCC_Basic;
+ return TTI::TCC_Free;
case Instruction::Store:
ImmIdx = 0;
break;
@@ -227,7 +100,7 @@ unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx,
case Instruction::LShr:
case Instruction::AShr:
if (Idx == 1)
- return TCC_Free;
+ return TTI::TCC_Free;
break;
case Instruction::Trunc:
case Instruction::ZExt:
@@ -245,26 +118,27 @@ unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx,
if (Idx == ImmIdx) {
unsigned NumConstants = (BitSize + 63) / 64;
- unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty);
- return (Cost <= NumConstants * TCC_Basic)
- ? static_cast<unsigned>(TCC_Free) : Cost;
+ unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
+ return (Cost <= NumConstants * TTI::TCC_Basic)
+ ? static_cast<unsigned>(TTI::TCC_Free)
+ : Cost;
}
- return AArch64TTI::getIntImmCost(Imm, Ty);
+ return AArch64TTIImpl::getIntImmCost(Imm, Ty);
}
-unsigned AArch64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) const {
+unsigned AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
// There is no cost model for constants with a bit size of 0. Return TCC_Free
// here, so that constant hoisting will ignore this constant.
if (BitSize == 0)
- return TCC_Free;
+ return TTI::TCC_Free;
switch (IID) {
default:
- return TCC_Free;
+ return TTI::TCC_Free;
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
case Intrinsic::ssub_with_overflow:
@@ -273,35 +147,36 @@ unsigned AArch64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
case Intrinsic::umul_with_overflow:
if (Idx == 1) {
unsigned NumConstants = (BitSize + 63) / 64;
- unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty);
- return (Cost <= NumConstants * TCC_Basic)
- ? static_cast<unsigned>(TCC_Free) : Cost;
+ unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
+ return (Cost <= NumConstants * TTI::TCC_Basic)
+ ? static_cast<unsigned>(TTI::TCC_Free)
+ : Cost;
}
break;
case Intrinsic::experimental_stackmap:
if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
- return TCC_Free;
+ return TTI::TCC_Free;
break;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
- return TCC_Free;
+ return TTI::TCC_Free;
break;
}
- return AArch64TTI::getIntImmCost(Imm, Ty);
+ return AArch64TTIImpl::getIntImmCost(Imm, Ty);
}
-AArch64TTI::PopcntSupportKind
-AArch64TTI::getPopcntSupport(unsigned TyWidth) const {
+TargetTransformInfo::PopcntSupportKind
+AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
if (TyWidth == 32 || TyWidth == 64)
- return PSK_FastHardware;
+ return TTI::PSK_FastHardware;
// TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
- return PSK_Software;
+ return TTI::PSK_Software;
}
-unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const {
+unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@@ -309,7 +184,7 @@ unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
EVT DstTy = TLI->getValueType(Dst);
if (!SrcTy.isSimple() || !DstTy.isSimple())
- return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+ return BaseT::getCastInstrCost(Opcode, Dst, Src);
static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
// LowerVectorINT_TO_FP:
@@ -380,11 +255,11 @@ unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
if (Idx != -1)
return ConversionTbl[Idx].Cost;
- return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+ return BaseT::getCastInstrCost(Opcode, Dst, Src);
}
-unsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) const {
+unsigned AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) {
assert(Val->isVectorTy() && "This must be a vector type");
if (Index != -1U) {
@@ -408,10 +283,10 @@ unsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
return 2;
}
-unsigned AArch64TTI::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
- OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
- OperandValueProperties Opd2PropInfo) const {
+unsigned AArch64TTIImpl::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
+ TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo) {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
@@ -442,8 +317,8 @@ unsigned AArch64TTI::getArithmeticInstrCost(
switch (ISD) {
default:
- return TargetTransformInfo::getArithmeticInstrCost(
- Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ Opd1PropInfo, Opd2PropInfo);
case ISD::ADD:
case ISD::MUL:
case ISD::XOR:
@@ -455,7 +330,7 @@ unsigned AArch64TTI::getArithmeticInstrCost(
}
}
-unsigned AArch64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
+unsigned AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
@@ -470,8 +345,8 @@ unsigned AArch64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
return 1;
}
-unsigned AArch64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) const {
+unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// We don't lower vector selects well that are wider than the register width.
@@ -498,12 +373,12 @@ unsigned AArch64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return VectorSelectTbl[Idx].Cost;
}
}
- return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
-unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) const {
+unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) {
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
@@ -531,7 +406,7 @@ unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src,
return LT.first;
}
-unsigned AArch64TTI::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const {
+unsigned AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
unsigned Cost = 0;
for (auto *I : Tys) {
if (!I->isVectorTy())
@@ -543,14 +418,94 @@ unsigned AArch64TTI::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const {
return Cost;
}
-unsigned AArch64TTI::getMaxInterleaveFactor() const {
+unsigned AArch64TTIImpl::getMaxInterleaveFactor() {
if (ST->isCortexA57())
return 4;
return 2;
}
-void AArch64TTI::getUnrollingPreferences(const Function *F, Loop *L,
- UnrollingPreferences &UP) const {
+void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
+ TTI::UnrollingPreferences &UP) {
// Disable partial & runtime unrolling on -Os.
UP.PartialOptSizeThreshold = 0;
}
+
+Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
+ Type *ExpectedType) {
+ switch (Inst->getIntrinsicID()) {
+ default:
+ return nullptr;
+ case Intrinsic::aarch64_neon_st2:
+ case Intrinsic::aarch64_neon_st3:
+ case Intrinsic::aarch64_neon_st4: {
+ // Create a struct type
+ StructType *ST = dyn_cast<StructType>(ExpectedType);
+ if (!ST)
+ return nullptr;
+ unsigned NumElts = Inst->getNumArgOperands() - 1;
+ if (ST->getNumElements() != NumElts)
+ return nullptr;
+ for (unsigned i = 0, e = NumElts; i != e; ++i) {
+ if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
+ return nullptr;
+ }
+ Value *Res = UndefValue::get(ExpectedType);
+ IRBuilder<> Builder(Inst);
+ for (unsigned i = 0, e = NumElts; i != e; ++i) {
+ Value *L = Inst->getArgOperand(i);
+ Res = Builder.CreateInsertValue(Res, L, i);
+ }
+ return Res;
+ }
+ case Intrinsic::aarch64_neon_ld2:
+ case Intrinsic::aarch64_neon_ld3:
+ case Intrinsic::aarch64_neon_ld4:
+ if (Inst->getType() == ExpectedType)
+ return Inst;
+ return nullptr;
+ }
+}
+
+bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
+ MemIntrinsicInfo &Info) {
+ switch (Inst->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::aarch64_neon_ld2:
+ case Intrinsic::aarch64_neon_ld3:
+ case Intrinsic::aarch64_neon_ld4:
+ Info.ReadMem = true;
+ Info.WriteMem = false;
+ Info.Vol = false;
+ Info.NumMemRefs = 1;
+ Info.PtrVal = Inst->getArgOperand(0);
+ break;
+ case Intrinsic::aarch64_neon_st2:
+ case Intrinsic::aarch64_neon_st3:
+ case Intrinsic::aarch64_neon_st4:
+ Info.ReadMem = false;
+ Info.WriteMem = true;
+ Info.Vol = false;
+ Info.NumMemRefs = 1;
+ Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
+ break;
+ }
+
+ switch (Inst->getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::aarch64_neon_ld2:
+ case Intrinsic::aarch64_neon_st2:
+ Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
+ break;
+ case Intrinsic::aarch64_neon_ld3:
+ case Intrinsic::aarch64_neon_st3:
+ Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
+ break;
+ case Intrinsic::aarch64_neon_ld4:
+ case Intrinsic::aarch64_neon_st4:
+ Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
+ break;
+ }
+ return true;
+}
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
new file mode 100644
index 0000000..dd3fd1f
--- /dev/null
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -0,0 +1,147 @@
+//===-- AArch64TargetTransformInfo.h - AArch64 specific TTI -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// AArch64 target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
+
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+
+namespace llvm {
+
+class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
+ typedef BasicTTIImplBase<AArch64TTIImpl> BaseT;
+ typedef TargetTransformInfo TTI;
+ friend BaseT;
+
+ const AArch64TargetMachine *TM;
+ const AArch64Subtarget *ST;
+ const AArch64TargetLowering *TLI;
+
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
+
+ const AArch64Subtarget *getST() const { return ST; }
+ const AArch64TargetLowering *getTLI() const { return TLI; }
+
+ enum MemIntrinsicType {
+ VECTOR_LDST_TWO_ELEMENTS,
+ VECTOR_LDST_THREE_ELEMENTS,
+ VECTOR_LDST_FOUR_ELEMENTS
+ };
+
+public:
+ explicit AArch64TTIImpl(const AArch64TargetMachine *TM, Function &F)
+ : BaseT(TM), TM(TM), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
+
+ // Provide value semantics. MSVC requires that we spell all of these out.
+ AArch64TTIImpl(const AArch64TTIImpl &Arg)
+ : BaseT(static_cast<const BaseT &>(Arg)), TM(Arg.TM), ST(Arg.ST),
+ TLI(Arg.TLI) {}
+ AArch64TTIImpl(AArch64TTIImpl &&Arg)
+ : BaseT(std::move(static_cast<BaseT &>(Arg))), TM(std::move(Arg.TM)),
+ ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {}
+ AArch64TTIImpl &operator=(const AArch64TTIImpl &RHS) {
+ BaseT::operator=(static_cast<const BaseT &>(RHS));
+ TM = RHS.TM;
+ ST = RHS.ST;
+ TLI = RHS.TLI;
+ return *this;
+ }
+ AArch64TTIImpl &operator=(AArch64TTIImpl &&RHS) {
+ BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+ TM = std::move(RHS.TM);
+ ST = std::move(RHS.ST);
+ TLI = std::move(RHS.TLI);
+ return *this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+
+ using BaseT::getIntImmCost;
+ unsigned getIntImmCost(int64_t Val);
+ unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+ unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty);
+ unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
+ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ unsigned getNumberOfRegisters(bool Vector) {
+ if (Vector) {
+ if (ST->hasNEON())
+ return 32;
+ return 0;
+ }
+ return 31;
+ }
+
+ unsigned getRegisterBitWidth(bool Vector) {
+ if (Vector) {
+ if (ST->hasNEON())
+ return 128;
+ return 0;
+ }
+ return 64;
+ }
+
+ unsigned getMaxInterleaveFactor();
+
+ unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+
+ unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+
+ unsigned getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+
+ unsigned getAddressComputationCost(Type *Ty, bool IsComplex);
+
+ unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+
+ unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace);
+
+ unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
+
+ void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+
+ Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
+ Type *ExpectedType);
+
+ bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
+
+ /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 98e0ea8..1960c99 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -210,9 +210,9 @@ private:
struct SysRegOp {
const char *Data;
unsigned Length;
- uint64_t FeatureBits; // We need to pass through information about which
- // core we are compiling for so that the SysReg
- // Mappers can appropriately conditionalize.
+ uint32_t MRSReg;
+ uint32_t MSRReg;
+ uint32_t PStateField;
};
struct SysCRImmOp {
@@ -374,11 +374,6 @@ public:
return StringRef(SysReg.Data, SysReg.Length);
}
- uint64_t getSysRegFeatureBits() const {
- assert(Kind == k_SysReg && "Invalid access!");
- return SysReg.FeatureBits;
- }
-
unsigned getSysCR() const {
assert(Kind == k_SysCR && "Invalid access!");
return SysCRImm.Val;
@@ -855,28 +850,17 @@ public:
bool isMRSSystemRegister() const {
if (!isSysReg()) return false;
- bool IsKnownRegister;
- auto Mapper = AArch64SysReg::MRSMapper(getSysRegFeatureBits());
- Mapper.fromString(getSysReg(), IsKnownRegister);
-
- return IsKnownRegister;
+ return SysReg.MRSReg != -1U;
}
bool isMSRSystemRegister() const {
if (!isSysReg()) return false;
- bool IsKnownRegister;
- auto Mapper = AArch64SysReg::MSRMapper(getSysRegFeatureBits());
- Mapper.fromString(getSysReg(), IsKnownRegister);
-
- return IsKnownRegister;
+ return SysReg.MSRReg != -1U;
}
bool isSystemPStateField() const {
if (!isSysReg()) return false;
- bool IsKnownRegister;
- AArch64PState::PStateMapper().fromString(getSysReg(), IsKnownRegister);
-
- return IsKnownRegister;
+ return SysReg.PStateField != -1U;
}
bool isReg() const override { return Kind == k_Register && !Reg.isVector; }
bool isVectorReg() const { return Kind == k_Register && Reg.isVector; }
@@ -1454,31 +1438,19 @@ public:
void addMRSSystemRegisterOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- bool Valid;
- auto Mapper = AArch64SysReg::MRSMapper(getSysRegFeatureBits());
- uint32_t Bits = Mapper.fromString(getSysReg(), Valid);
-
- Inst.addOperand(MCOperand::CreateImm(Bits));
+ Inst.addOperand(MCOperand::CreateImm(SysReg.MRSReg));
}
void addMSRSystemRegisterOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- bool Valid;
- auto Mapper = AArch64SysReg::MSRMapper(getSysRegFeatureBits());
- uint32_t Bits = Mapper.fromString(getSysReg(), Valid);
-
- Inst.addOperand(MCOperand::CreateImm(Bits));
+ Inst.addOperand(MCOperand::CreateImm(SysReg.MSRReg));
}
void addSystemPStateFieldOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- bool Valid;
- uint32_t Bits =
- AArch64PState::PStateMapper().fromString(getSysReg(), Valid);
-
- Inst.addOperand(MCOperand::CreateImm(Bits));
+ Inst.addOperand(MCOperand::CreateImm(SysReg.PStateField));
}
void addSysCROperands(MCInst &Inst, unsigned N) const {
@@ -1645,12 +1617,17 @@ public:
return Op;
}
- static std::unique_ptr<AArch64Operand>
- CreateSysReg(StringRef Str, SMLoc S, uint64_t FeatureBits, MCContext &Ctx) {
+ static std::unique_ptr<AArch64Operand> CreateSysReg(StringRef Str, SMLoc S,
+ uint32_t MRSReg,
+ uint32_t MSRReg,
+ uint32_t PStateField,
+ MCContext &Ctx) {
auto Op = make_unique<AArch64Operand>(k_SysReg, Ctx);
Op->SysReg.Data = Str.data();
Op->SysReg.Length = Str.size();
- Op->SysReg.FeatureBits = FeatureBits;
+ Op->SysReg.MRSReg = MRSReg;
+ Op->SysReg.MSRReg = MSRReg;
+ Op->SysReg.PStateField = PStateField;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
@@ -2643,8 +2620,24 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
if (Tok.isNot(AsmToken::Identifier))
return MatchOperand_NoMatch;
- Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), getLoc(),
- STI.getFeatureBits(), getContext()));
+ bool IsKnown;
+ auto MRSMapper = AArch64SysReg::MRSMapper(STI.getFeatureBits());
+ uint32_t MRSReg = MRSMapper.fromString(Tok.getString(), IsKnown);
+ assert(IsKnown == (MRSReg != -1U) &&
+ "register should be -1 if and only if it's unknown");
+
+ auto MSRMapper = AArch64SysReg::MSRMapper(STI.getFeatureBits());
+ uint32_t MSRReg = MSRMapper.fromString(Tok.getString(), IsKnown);
+ assert(IsKnown == (MSRReg != -1U) &&
+ "register should be -1 if and only if it's unknown");
+
+ uint32_t PStateField =
+ AArch64PState::PStateMapper().fromString(Tok.getString(), IsKnown);
+ assert(IsKnown == (PStateField != -1U) &&
+ "register should be -1 if and only if it's unknown");
+
+ Operands.push_back(AArch64Operand::CreateSysReg(
+ Tok.getString(), getLoc(), MRSReg, MSRReg, PStateField, getContext()));
Parser.Lex(); // Eat identifier
return MatchOperand_Success;
@@ -3927,7 +3920,6 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
llvm_unreachable("Implement any new match types added!");
- return true;
}
/// ParseDirective parses the arm specific directives
@@ -4140,7 +4132,7 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
Parser.Lex(); // Consume the EndOfStatement
auto pair = std::make_pair(IsVector, RegNum);
- if (!RegisterReqs.insert(std::make_pair(Name, pair)).second)
+ if (RegisterReqs.insert(std::make_pair(Name, pair)).first->second != pair)
Warning(L, "ignoring redefinition of register alias '" + Name + "'");
return true;
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 878e29c..fb25089 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -221,13 +221,11 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
static MCSymbolizer *
createAArch64ExternalSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo,
- LLVMSymbolLookupCallback SymbolLookUp,
- void *DisInfo, MCContext *Ctx,
- MCRelocationInfo *RelInfo) {
- return new llvm::AArch64ExternalSymbolizer(
- *Ctx,
- std::unique_ptr<MCRelocationInfo>(RelInfo),
- GetOpInfo, SymbolLookUp, DisInfo);
+ LLVMSymbolLookupCallback SymbolLookUp,
+ void *DisInfo, MCContext *Ctx,
+ std::unique_ptr<MCRelocationInfo> &&RelInfo) {
+ return new llvm::AArch64ExternalSymbolizer(*Ctx, move(RelInfo), GetOpInfo,
+ SymbolLookUp, DisInfo);
}
extern "C" void LLVMInitializeAArch64Disassembler() {
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index 1dc506a..ed24343 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -51,7 +51,7 @@ enum ShiftExtendType {
/// getShiftName - Get the string encoding for the shift type.
static inline const char *getShiftExtendName(AArch64_AM::ShiftExtendType ST) {
switch (ST) {
- default: assert(false && "unhandled shift type!");
+ default: llvm_unreachable("unhandled shift type!");
case AArch64_AM::LSL: return "lsl";
case AArch64_AM::LSR: return "lsr";
case AArch64_AM::ASR: return "asr";
@@ -236,21 +236,22 @@ static inline bool processLogicalImmediate(uint64_t Imm, unsigned RegSize,
if (isShiftedMask_64(Imm)) {
I = countTrailingZeros(Imm);
- CTO = CountTrailingOnes_64(Imm >> I);
+ assert(I < 64 && "undefined behavior");
+ CTO = countTrailingOnes(Imm >> I);
} else {
Imm |= ~Mask;
if (!isShiftedMask_64(~Imm))
return false;
- unsigned CLO = CountLeadingOnes_64(Imm);
+ unsigned CLO = countLeadingOnes(Imm);
I = 64 - CLO;
- CTO = CLO + CountTrailingOnes_64(Imm) - (64 - Size);
+ CTO = CLO + countTrailingOnes(Imm) - (64 - Size);
}
// Encode in Immr the number of RORs it would take to get *from* 0^m 1^n
- // to our target value, where i is the number of RORs to go the opposite
+ // to our target value, where I is the number of RORs to go the opposite
// direction.
- assert(Size > I && "I should be smaller than element Size");
+ assert(Size > I && "I should be smaller than element size");
unsigned Immr = (Size - I) & (Size - 1);
// If size has a 1 in the n'th bit, create a value that has zeroes in
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 0bc2f77..423da65 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -13,8 +13,8 @@
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCDirectives.h"
-#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
@@ -132,7 +132,7 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
int64_t SignedValue = static_cast<int64_t>(Value);
switch (Kind) {
default:
- assert(false && "Unknown fixup kind!");
+ llvm_unreachable("Unknown fixup kind!");
case AArch64::fixup_aarch64_pcrel_adr_imm21:
if (SignedValue > 2097151 || SignedValue < -2097152)
report_fatal_error("fixup value out of range");
@@ -239,7 +239,7 @@ bool AArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
void AArch64AsmBackend::relaxInstruction(const MCInst &Inst,
MCInst &Res) const {
- assert(false && "AArch64AsmBackend::relaxInstruction() unimplemented");
+ llvm_unreachable("AArch64AsmBackend::relaxInstruction() unimplemented");
}
bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
@@ -317,42 +317,6 @@ public:
MachO::CPU_SUBTYPE_ARM64_ALL);
}
- bool doesSectionRequireSymbols(const MCSection &Section) const override {
- // Any section for which the linker breaks things into atoms needs to
- // preserve symbols, including assembler local symbols, to identify
- // those atoms. These sections are:
- // Sections of type:
- //
- // S_CSTRING_LITERALS (e.g. __cstring)
- // S_LITERAL_POINTERS (e.g. objc selector pointers)
- // S_16BYTE_LITERALS, S_8BYTE_LITERALS, S_4BYTE_LITERALS
- //
- // Sections named:
- //
- // __TEXT,__eh_frame
- // __TEXT,__ustring
- // __DATA,__cfstring
- // __DATA,__objc_classrefs
- // __DATA,__objc_catlist
- //
- // FIXME: It would be better if the compiler used actual linker local
- // symbols for each of these sections rather than preserving what
- // are ostensibly assembler local symbols.
- const MCSectionMachO &SMO = static_cast<const MCSectionMachO &>(Section);
- return (SMO.getType() == MachO::S_CSTRING_LITERALS ||
- SMO.getType() == MachO::S_4BYTE_LITERALS ||
- SMO.getType() == MachO::S_8BYTE_LITERALS ||
- SMO.getType() == MachO::S_16BYTE_LITERALS ||
- SMO.getType() == MachO::S_LITERAL_POINTERS ||
- (SMO.getSegmentName() == "__TEXT" &&
- (SMO.getSectionName() == "__eh_frame" ||
- SMO.getSectionName() == "__ustring")) ||
- (SMO.getSegmentName() == "__DATA" &&
- (SMO.getSectionName() == "__cfstring" ||
- SMO.getSectionName() == "__objc_classrefs" ||
- SMO.getSectionName() == "__objc_catlist")));
- }
-
/// \brief Generate the compact unwind encoding from the CFI directives.
uint32_t generateCompactUnwindEncoding(
ArrayRef<MCCFIInstruction> Instrs) const override {
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index e05191e..5ea49c3 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -78,7 +78,7 @@ unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target,
if (SymLoc == AArch64MCExpr::VK_GOTTPREL && !IsNC)
return ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21;
if (SymLoc == AArch64MCExpr::VK_TLSDESC && !IsNC)
- return ELF::R_AARCH64_TLSDESC_ADR_PAGE;
+ return ELF::R_AARCH64_TLSDESC_ADR_PAGE21;
llvm_unreachable("invalid symbol kind for ADRP relocation");
case AArch64::fixup_aarch64_pcrel_branch26:
return ELF::R_AARCH64_JUMP26;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 60e9c19..8dc6c30 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -177,7 +177,9 @@ private:
MCELF::SetType(SD, ELF::STT_NOTYPE);
MCELF::SetBinding(SD, ELF::STB_LOCAL);
SD.setExternal(false);
- Symbol->setSection(*getCurrentSection().first);
+ auto Sec = getCurrentSection().first;
+ assert(Sec && "need a section");
+ Symbol->setSection(*Sec);
const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
Symbol->setVariableValue(Value);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 70b9329..f048474 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -37,6 +37,7 @@ AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() {
AssemblerDialect = AsmWriterVariant == Default ? 1 : AsmWriterVariant;
PrivateGlobalPrefix = "L";
+ PrivateLabelPrefix = "L";
SeparatorString = "%%";
CommentString = ";";
PointerSize = CalleeSaveStackSlotSize = 8;
@@ -79,6 +80,7 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(StringRef TT) {
CommentString = "//";
PrivateGlobalPrefix = ".L";
+ PrivateLabelPrefix = ".L";
Code32Directive = ".code\t32";
Data16bitsDirective = "\t.hword\t";
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index 5d03c21..9b88de7 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCASMINFO_H
#include "llvm/MC/MCAsmInfoDarwin.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
class Target;
@@ -27,7 +28,7 @@ struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin {
MCStreamer &Streamer) const override;
};
-struct AArch64MCAsmInfoELF : public MCAsmInfo {
+struct AArch64MCAsmInfoELF : public MCAsmInfoELF {
explicit AArch64MCAsmInfoELF(StringRef TT);
};
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index c306b11..4756a19 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -437,8 +437,7 @@ AArch64MCCodeEmitter::getVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
return 3;
}
- assert(false && "Invalid value for vector shift amount!");
- return 0;
+ llvm_unreachable("Invalid value for vector shift amount!");
}
uint32_t
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index e12a24b..0d9385d 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -10,6 +10,7 @@
#include "MCTargetDesc/AArch64FixupKinds.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
@@ -33,7 +34,7 @@ public:
: MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype,
/*UseAggressiveSymbolFolding=*/true) {}
- void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
+ void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
const MCAsmLayout &Layout, const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) override;
@@ -112,8 +113,36 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
}
}
+static bool canUseLocalRelocation(const MCSectionMachO &Section,
+ const MCSymbol &Symbol, unsigned Log2Size) {
+ // Debug info sections can use local relocations.
+ if (Section.hasAttribute(MachO::S_ATTR_DEBUG))
+ return true;
+
+ // Otherwise, only pointer sized relocations are supported.
+ if (Log2Size != 3)
+ return false;
+
+ // But only if they don't point to a few forbidden sections.
+ if (!Symbol.isInSection())
+ return true;
+ const MCSectionMachO &RefSec = cast<MCSectionMachO>(Symbol.getSection());
+ if (RefSec.getType() == MachO::S_CSTRING_LITERALS)
+ return false;
+
+ if (RefSec.getSegmentName() == "__DATA" &&
+ RefSec.getSectionName() == "__cfstring")
+ return false;
+
+ if (RefSec.getSegmentName() == "__DATA" &&
+ RefSec.getSectionName() == "__objc_classrefs")
+ return false;
+
+ return true;
+}
+
void AArch64MachObjectWriter::RecordRelocation(
- MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
+ MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
@@ -123,9 +152,9 @@ void AArch64MachObjectWriter::RecordRelocation(
unsigned Log2Size = 0;
int64_t Value = 0;
unsigned Index = 0;
- unsigned IsExtern = 0;
unsigned Type = 0;
unsigned Kind = Fixup.getKind();
+ const MCSymbolData *RelSymbol = nullptr;
FixupOffset += Fixup.getOffset();
@@ -171,10 +200,8 @@ void AArch64MachObjectWriter::RecordRelocation(
// FIXME: Should this always be extern?
// SymbolNum of 0 indicates the absolute section.
Type = MachO::ARM64_RELOC_UNSIGNED;
- Index = 0;
if (IsPCRel) {
- IsExtern = 1;
Asm.getContext().FatalError(Fixup.getLoc(),
"PC relative absolute relocation!");
@@ -198,15 +225,12 @@ void AArch64MachObjectWriter::RecordRelocation(
Layout.getSymbolOffset(&B_SD) ==
Layout.getFragmentOffset(Fragment) + Fixup.getOffset()) {
// SymB is the PC, so use a PC-rel pointer-to-GOT relocation.
- Index = A_Base->getIndex();
- IsExtern = 1;
Type = MachO::ARM64_RELOC_POINTER_TO_GOT;
IsPCRel = 1;
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
- MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
- (IsExtern << 27) | (Type << 28));
- Writer->addRelocation(Fragment->getParent(), MRE);
+ MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+ Writer->addRelocation(A_Base, Fragment->getParent(), MRE);
return;
} else if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
@@ -252,26 +276,31 @@ void AArch64MachObjectWriter::RecordRelocation(
? 0
: Writer->getSymbolAddress(B_Base, Layout));
- Index = A_Base->getIndex();
- IsExtern = 1;
Type = MachO::ARM64_RELOC_UNSIGNED;
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
- MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
- (IsExtern << 27) | (Type << 28));
- Writer->addRelocation(Fragment->getParent(), MRE);
+ MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+ Writer->addRelocation(A_Base, Fragment->getParent(), MRE);
- Index = B_Base->getIndex();
- IsExtern = 1;
+ RelSymbol = B_Base;
Type = MachO::ARM64_RELOC_SUBTRACTOR;
} else { // A + constant
const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
- const MCSymbolData &SD = Asm.getSymbolData(*Symbol);
- const MCSymbolData *Base = Asm.getAtom(&SD);
const MCSectionMachO &Section = static_cast<const MCSectionMachO &>(
Fragment->getParent()->getSection());
+ bool CanUseLocalRelocation =
+ canUseLocalRelocation(Section, *Symbol, Log2Size);
+ if (Symbol->isTemporary() && (Value || !CanUseLocalRelocation)) {
+ const MCSection &Sec = Symbol->getSection();
+ if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec))
+ Asm.addLocalUsedInReloc(*Symbol);
+ }
+
+ const MCSymbolData &SD = Asm.getSymbolData(*Symbol);
+ const MCSymbolData *Base = Asm.getAtom(&SD);
+
// If the symbol is a variable and we weren't able to get a Base for it
// (i.e., it's not in the symbol table associated with a section) resolve
// the relocation based its expansion instead.
@@ -310,16 +339,13 @@ void AArch64MachObjectWriter::RecordRelocation(
// sections, and for pointer-sized relocations (.quad), we allow section
// relocations. It's code sections that run into trouble.
if (Base) {
- Index = Base->getIndex();
- IsExtern = 1;
+ RelSymbol = Base;
// Add the local offset, if needed.
if (Base != &SD)
Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
} else if (Symbol->isInSection()) {
- // Pointer-sized relocations can use a local relocation. Otherwise,
- // we have to be in a debug info section.
- if (!Section.hasAttribute(MachO::S_ATTR_DEBUG) && Log2Size != 3)
+ if (!CanUseLocalRelocation)
Asm.getContext().FatalError(
Fixup.getLoc(),
"unsupported relocation of local symbol '" + Symbol->getName() +
@@ -329,7 +355,6 @@ void AArch64MachObjectWriter::RecordRelocation(
const MCSectionData &SymSD =
Asm.getSectionData(SD.getSymbol().getSection());
Index = SymSD.getOrdinal() + 1;
- IsExtern = 0;
Value += Writer->getSymbolAddress(&SD, Layout);
if (IsPCRel)
@@ -362,16 +387,16 @@ void AArch64MachObjectWriter::RecordRelocation(
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
- MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
- (IsExtern << 27) | (Type << 28));
- Writer->addRelocation(Fragment->getParent(), MRE);
+ MRE.r_word1 =
+ (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+ Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
// Now set up the Addend relocation.
Type = MachO::ARM64_RELOC_ADDEND;
Index = Value;
+ RelSymbol = nullptr;
IsPCRel = 0;
Log2Size = 2;
- IsExtern = 0;
// Put zero into the instruction itself. The addend is in the relocation.
Value = 0;
@@ -383,9 +408,9 @@ void AArch64MachObjectWriter::RecordRelocation(
// struct relocation_info (8 bytes)
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
- MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
- (IsExtern << 27) | (Type << 28));
- Writer->addRelocation(Fragment->getParent(), MRE);
+ MRE.r_word1 =
+ (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+ Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
}
MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_ostream &OS,