aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/ARM
diff options
context:
space:
mode:
authorPirama Arumuga Nainar <pirama@google.com>2015-04-10 21:22:52 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2015-04-10 21:23:04 +0000
commit31195f0bdca6ee2a5e72d07edf13e1d81206d949 (patch)
tree1b2c9792582e12f5af0b1512e3094425f0dc0df9 /lib/Target/ARM
parentc75239e6119d0f9a74c57099d91cbc9bde56bf33 (diff)
parent4c5e43da7792f75567b693105cc53e3f1992ad98 (diff)
downloadexternal_llvm-31195f0bdca6ee2a5e72d07edf13e1d81206d949.zip
external_llvm-31195f0bdca6ee2a5e72d07edf13e1d81206d949.tar.gz
external_llvm-31195f0bdca6ee2a5e72d07edf13e1d81206d949.tar.bz2
Merge "Update aosp/master llvm for rebase to r233350"
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp12
-rw-r--r--lib/Target/ARM/ARM.td30
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp72
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h9
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp34
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp83
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h27
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp4
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp5
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp9
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp457
-rw-r--r--lib/Target/ARM/ARMISelLowering.h33
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td7
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp7
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td15
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td215
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td107
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp3
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h6
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.cpp4
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.h2
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp2
-rw-r--r--lib/Target/ARM/ARMSubtarget.h8
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp66
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h4
-rw-r--r--lib/Target/ARM/Android.mk4
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp9
-rw-r--r--lib/Target/ARM/CMakeLists.txt3
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMArchName.def3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp29
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp183
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h21
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp72
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp10
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp3
-rw-r--r--lib/Target/ARM/README-Thumb.txt2
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp22
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.h2
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp3
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h6
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp2
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp3
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h6
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.cpp53
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.h38
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp4
-rw-r--r--lib/Target/ARM/ThumbRegisterInfo.cpp (renamed from lib/Target/ARM/Thumb1RegisterInfo.cpp)128
-rw-r--r--lib/Target/ARM/ThumbRegisterInfo.h (renamed from lib/Target/ARM/Thumb1RegisterInfo.h)14
53 files changed, 926 insertions, 927 deletions
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index 387f1f6..7a1865c 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -27,12 +27,15 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <map>
@@ -678,8 +681,13 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
}
bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
- TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo());
- TRI = Fn.getSubtarget().getRegisterInfo();
+ const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
+ // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
+ // enabled when NEON is available.
+ if (!(STI.isCortexA15() && STI.hasNEON()))
+ return false;
+ TII = STI.getInstrInfo();
+ TRI = STI.getRegisterInfo();
MRI = &Fn.getRegInfo();
bool Modified = false;
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index f080c60..ce0aed9 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -167,9 +167,12 @@ def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true",
def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true",
"Support ARM v6M instructions",
[HasV6Ops]>;
+def HasV6KOps : SubtargetFeature<"v6k", "HasV6KOps", "true",
+ "Support ARM v6k instructions",
+ [HasV6Ops]>;
def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
"Support ARM v6t2 instructions",
- [HasV6MOps, FeatureThumb2]>;
+ [HasV6MOps, HasV6KOps, FeatureThumb2]>;
def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
"Support ARM v7 instructions",
[HasV6T2Ops, FeaturePerfMon]>;
@@ -177,6 +180,9 @@ def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true",
"Support ARM v8 instructions",
[HasV7Ops, FeatureVirtualization,
FeatureMP]>;
+def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true",
+ "Support ARM v8.1a instructions",
+ [HasV8Ops, FeatureAClass, FeatureCRC]>;
//===----------------------------------------------------------------------===//
// ARM Processors supported.
@@ -320,12 +326,6 @@ def : ProcNoItin<"iwmmxt", [HasV5TEOps]>;
def : Processor<"arm1136j-s", ARMV6Itineraries, [HasV6Ops]>;
def : Processor<"arm1136jf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
FeatureHasSlowFPVMLx]>;
-def : Processor<"arm1176jz-s", ARMV6Itineraries, [HasV6Ops]>;
-def : Processor<"arm1176jzf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
- FeatureHasSlowFPVMLx]>;
-def : Processor<"mpcorenovfp", ARMV6Itineraries, [HasV6Ops]>;
-def : Processor<"mpcore", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
- FeatureHasSlowFPVMLx]>;
// V6M Processors.
def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
@@ -337,6 +337,14 @@ def : Processor<"cortex-m1", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
def : Processor<"sc000", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
FeatureDB, FeatureMClass]>;
+// V6K Processors.
+def : Processor<"arm1176jz-s", ARMV6Itineraries, [HasV6KOps]>;
+def : Processor<"arm1176jzf-s", ARMV6Itineraries, [HasV6KOps, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+def : Processor<"mpcorenovfp", ARMV6Itineraries, [HasV6KOps]>;
+def : Processor<"mpcore", ARMV6Itineraries, [HasV6KOps, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
// V6T2 Processors.
def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops,
FeatureDSPThumb2]>;
@@ -449,6 +457,14 @@ def : ProcessorModel<"cyclone", SwiftModel,
FeatureDB,FeatureDSPThumb2,
FeatureHasRAS, FeatureZCZeroing]>;
+// V8.1 Processors
+def : ProcNoItin<"generic-armv8.1-a", [HasV8Ops, FeatureV8_1a,
+ FeatureDB, FeatureFPARMv8,
+ FeatureNEON, FeatureDSPThumb2,
+ FeatureHWDiv, FeatureHWDivARM,
+ FeatureTrustZone, FeatureT2XtPk,
+ FeatureCrypto]>;
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 2544a01..102def1 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -120,9 +120,6 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer.EndCOFFSymbolDef();
}
- // Have common code print out the function header with linkage info etc.
- EmitFunctionHeader();
-
// Emit the rest of the function body.
EmitFunctionBody();
@@ -438,65 +435,6 @@ void ARMAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
Triple TT(TM.getTargetTriple());
- if (TT.isOSBinFormatMachO()) {
- Reloc::Model RelocM = TM.getRelocationModel();
- if (RelocM == Reloc::PIC_ || RelocM == Reloc::DynamicNoPIC) {
- // Declare all the text sections up front (before the DWARF sections
- // emitted by AsmPrinter::doInitialization) so the assembler will keep
- // them together at the beginning of the object file. This helps
- // avoid out-of-range branches that are due a fundamental limitation of
- // the way symbol offsets are encoded with the current Darwin ARM
- // relocations.
- const TargetLoweringObjectFileMachO &TLOFMacho =
- static_cast<const TargetLoweringObjectFileMachO &>(
- getObjFileLowering());
-
- // Collect the set of sections our functions will go into.
- SetVector<const MCSection *, SmallVector<const MCSection *, 8>,
- SmallPtrSet<const MCSection *, 8> > TextSections;
- // Default text section comes first.
- TextSections.insert(TLOFMacho.getTextSection());
- // Now any user defined text sections from function attributes.
- for (Module::iterator F = M.begin(), e = M.end(); F != e; ++F)
- if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage())
- TextSections.insert(TLOFMacho.SectionForGlobal(F, *Mang, TM));
- // Now the coalescable sections.
- TextSections.insert(TLOFMacho.getTextCoalSection());
- TextSections.insert(TLOFMacho.getConstTextCoalSection());
-
- // Emit the sections in the .s file header to fix the order.
- for (unsigned i = 0, e = TextSections.size(); i != e; ++i)
- OutStreamer.SwitchSection(TextSections[i]);
-
- if (RelocM == Reloc::DynamicNoPIC) {
- const MCSection *sect =
- OutContext.getMachOSection("__TEXT", "__symbol_stub4",
- MachO::S_SYMBOL_STUBS,
- 12, SectionKind::getText());
- OutStreamer.SwitchSection(sect);
- } else {
- const MCSection *sect =
- OutContext.getMachOSection("__TEXT", "__picsymbolstub4",
- MachO::S_SYMBOL_STUBS,
- 16, SectionKind::getText());
- OutStreamer.SwitchSection(sect);
- }
- const MCSection *StaticInitSect =
- OutContext.getMachOSection("__TEXT", "__StaticInit",
- MachO::S_REGULAR |
- MachO::S_ATTR_PURE_INSTRUCTIONS,
- SectionKind::getText());
- OutStreamer.SwitchSection(StaticInitSect);
- }
-
- // Compiling with debug info should not affect the code
- // generation. Ensure the cstring section comes before the
- // optional __DWARF secion. Otherwise, PC-relative loads would
- // have to use different instruction sequences at "-g" in order to
- // reach global data in the same object file.
- OutStreamer.SwitchSection(getObjFileLowering().getCStringSection());
- }
-
// Use unified assembler syntax.
OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified);
@@ -669,7 +607,7 @@ void ARMAsmPrinter::emitAttributes() {
std::string CPUString = STI.getCPUString();
- if (CPUString != "generic") {
+ if (CPUString.find("generic") != 0) { //CPUString doesn't start with "generic"
// FIXME: remove krait check when GNU tools support krait cpu
if (STI.isKrait()) {
ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9");
@@ -723,7 +661,8 @@ void ARMAsmPrinter::emitAttributes() {
// Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
if (STI.hasV8Ops())
ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
- ARMBuildAttrs::AllowNeonARMv8);
+ STI.hasV8_1a() ? ARMBuildAttrs::AllowNeonARMv8_1a:
+ ARMBuildAttrs::AllowNeonARMv8);
} else {
if (STI.hasFPARMv8())
// FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
@@ -960,10 +899,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
MCSymbol *MCSym;
if (ACPV->isLSDA()) {
- SmallString<128> Str;
- raw_svector_ostream OS(Str);
- OS << DL->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber();
- MCSym = OutContext.GetOrCreateSymbol(OS.str());
+ MCSym = getCurExceptionSym();
} else if (ACPV->isBlockAddress()) {
const BlockAddress *BA =
cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress();
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 50cb954..e475ae4 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -103,13 +103,16 @@ private:
const MachineInstr *MI);
public:
- unsigned getISAEncoding(const Function *F) override {
+ unsigned getISAEncoding() override {
// ARM/Darwin adds ISA to the DWARF info for each function.
Triple TT(TM.getTargetTriple());
if (!TT.isOSBinFormatMachO())
return 0;
- const ARMSubtarget &STI = TM.getSubtarget<ARMSubtarget>(*F);
- return STI.isThumb() ? ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm;
+ bool isThumb = TT.getArch() == Triple::thumb ||
+ TT.getArch() == Triple::thumbeb ||
+ TT.getSubArch() == Triple::ARMSubArch_v7m ||
+ TT.getSubArch() == Triple::ARMSubArch_v6m;
+ return isThumb ? ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm;
}
private:
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 29ee22e..7ee3cb0 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -37,6 +37,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -4115,19 +4116,21 @@ enum ARMExeDomain {
//
std::pair<uint16_t, uint16_t>
ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
- // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
- // if they are not predicated.
- if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
- return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
-
- // CortexA9 is particularly picky about mixing the two and wants these
- // converted.
- if (Subtarget.isCortexA9() && !isPredicated(MI) &&
- (MI->getOpcode() == ARM::VMOVRS ||
- MI->getOpcode() == ARM::VMOVSR ||
- MI->getOpcode() == ARM::VMOVS))
- return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
-
+ // If we don't have access to NEON instructions then we won't be able
+ // to swizzle anything to the NEON domain. Check to make sure.
+ if (Subtarget.hasNEON()) {
+ // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
+ // if they are not predicated.
+ if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
+ return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
+
+ // CortexA9 is particularly picky about mixing the two and wants these
+ // converted.
+ if (Subtarget.isCortexA9() && !isPredicated(MI) &&
+ (MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR ||
+ MI->getOpcode() == ARM::VMOVS))
+ return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
+ }
// No other instructions can be swizzled, so just determine their domain.
unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
@@ -4220,6 +4223,9 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
// Zap the predicate operands.
assert(!isPredicated(MI) && "Cannot predicate a VORRd");
+ // Make sure we've got NEON instructions.
+ assert(Subtarget.hasNEON() && "VORRd requires NEON");
+
// Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
DstReg = MI->getOperand(0).getReg();
SrcReg = MI->getOperand(1).getReg();
@@ -4507,7 +4513,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
}
bool ARMBaseInstrInfo::hasNOP() const {
- return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
+ return (Subtarget.getFeatureBits() & ARM::HasV6KOps) != 0;
}
bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 7574727..a8c7657 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -45,26 +45,27 @@
using namespace llvm;
-ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti)
- : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) {
+ARMBaseRegisterInfo::ARMBaseRegisterInfo()
+ : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {}
+
+static unsigned getFramePointerReg(const ARMSubtarget &STI) {
if (STI.isTargetMachO()) {
if (STI.isTargetDarwin() || STI.isThumb1Only())
- FramePtr = ARM::R7;
+ return ARM::R7;
else
- FramePtr = ARM::R11;
+ return ARM::R11;
} else if (STI.isTargetWindows())
- FramePtr = ARM::R11;
+ return ARM::R11;
else // ARM EABI
- FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11;
+ return STI.isThumb() ? ARM::R7 : ARM::R11;
}
const MCPhysReg*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
const MCPhysReg *RegList =
STI.isTargetDarwin() ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
- if (!MF) return RegList;
-
const Function *F = MF->getFunction();
if (F->getCallingConv() == CallingConv::GHC) {
// GHC set of callee saved regs is empty as all those regs are
@@ -89,8 +90,10 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return RegList;
}
-const uint32_t*
-ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+const uint32_t *
+ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
if (CC == CallingConv::GHC)
// This is academic becase all GHC calls are (supposed to be) tail calls
return CSR_NoRegs_RegMask;
@@ -102,8 +105,10 @@ ARMBaseRegisterInfo::getNoPreservedMask() const {
return CSR_NoRegs_RegMask;
}
-const uint32_t*
-ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
+const uint32_t *
+ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
// This should return a register mask that is the same as that returned by
// getCallPreservedMask but that additionally preserves the register used for
// the first i32 argument (which must also be the register used to return a
@@ -121,7 +126,8 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
BitVector ARMBaseRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ const TargetFrameLowering *TFI = STI.getFrameLowering();
// FIXME: avoid re-calculating this every time.
BitVector Reserved(getNumRegs());
@@ -130,7 +136,7 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(ARM::FPSCR);
Reserved.set(ARM::APSR_NZCV);
if (TFI->hasFP(MF))
- Reserved.set(FramePtr);
+ Reserved.set(getFramePointerReg(STI));
if (hasBasePointer(MF))
Reserved.set(BasePtr);
// Some targets reserve R9.
@@ -150,9 +156,9 @@ getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-const TargetRegisterClass*
-ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
- const {
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &) const {
const TargetRegisterClass *Super = RC;
TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
do {
@@ -187,7 +193,8 @@ ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
unsigned
ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ const TargetFrameLowering *TFI = STI.getFrameLowering();
switch (RC->getID()) {
default:
@@ -283,29 +290,6 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg,
}
}
-bool
-ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
- // CortexA9 has a Write-after-write hazard for NEON registers.
- if (!STI.isLikeA9())
- return false;
-
- switch (RC->getID()) {
- case ARM::DPRRegClassID:
- case ARM::DPR_8RegClassID:
- case ARM::DPR_VFP2RegClassID:
- case ARM::QPRRegClassID:
- case ARM::QPR_8RegClassID:
- case ARM::QPR_VFP2RegClassID:
- case ARM::SPRRegClassID:
- case ARM::SPR_8RegClassID:
- // Avoid reusing S, D, and Q registers.
- // Don't increase register pressure for QQ and QQQQ.
- return true;
- default:
- return false;
- }
-}
-
bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -350,7 +334,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
return false;
// Stack realignment requires a frame pointer. If we already started
// register allocation with frame pointer elimination, it is too late now.
- if (!MRI->canReserveReg(FramePtr))
+ if (!MRI->canReserveReg(getFramePointerReg(MF.getSubtarget<ARMSubtarget>())))
return false;
// We may also need a base pointer if there are dynamic allocas or stack
// pointer adjustments around calls.
@@ -384,10 +368,11 @@ cannotEliminateFrame(const MachineFunction &MF) const {
unsigned
ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ const TargetFrameLowering *TFI = STI.getFrameLowering();
if (TFI->hasFP(MF))
- return FramePtr;
+ return getFramePointerReg(STI);
return ARM::SP;
}
@@ -539,7 +524,6 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// The incoming offset is relating to the SP at the start of the function,
// but when we access the local it'll be relative to the SP after local
// allocation, so adjust our SP-relative offset by that allocation size.
- Offset = -Offset;
Offset += MFI->getLocalFrameSize();
// Assume that we'll have at least some spill slots allocated.
// FIXME: This is a total SWAG number. We should run some statistics
@@ -552,9 +536,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// on whether there are any local variables that would trigger it.
unsigned StackAlign = TFI->getStackAlignment();
if (TFI->hasFP(MF) &&
- (MI->getDesc().TSFlags & ARMII::AddrModeMask) != ARMII::AddrModeT1_s &&
!((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
- if (isFrameOffsetLegal(MI, FPOffset))
+ if (isFrameOffsetLegal(MI, getFrameRegister(MF), FPOffset))
return false;
}
// If we can reference via the stack pointer, try that.
@@ -562,7 +545,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// to only disallow SP relative references in the live range of
// the VLA(s). In practice, it's unclear how much difference that
// would make, but it may be worth doing.
- if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset))
+ if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, ARM::SP, Offset))
return false;
// The offset likely isn't legal, we want to allocate a virtual base register.
@@ -625,7 +608,7 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
(void)Done;
}
-bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
int64_t Offset) const {
const MCInstrDesc &Desc = MI->getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
@@ -669,7 +652,7 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
NumBits = 8;
break;
case ARMII::AddrModeT1_s:
- NumBits = 8;
+ NumBits = (BaseReg == ARM::SP ? 8 : 5);
Scale = 4;
isSigned = false;
break;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 17027c2..fdc1ef9 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -21,10 +21,6 @@
#include "ARMGenRegisterInfo.inc"
namespace llvm {
- class ARMSubtarget;
- class ARMBaseInstrInfo;
- class Type;
-
/// Register allocation hints.
namespace ARMRI {
enum {
@@ -82,27 +78,22 @@ static inline bool isCalleeSavedRegister(unsigned Reg,
class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
protected:
- const ARMSubtarget &STI;
-
- /// FramePtr - ARM physical register used as frame ptr.
- unsigned FramePtr;
-
/// BasePtr - ARM physical register used as a base ptr in complex stack
/// frames. I.e., when we need a 3rd base, not just SP and FP, due to
/// variable size stack objects.
unsigned BasePtr;
// Can be only subclassed.
- explicit ARMBaseRegisterInfo(const ARMSubtarget &STI);
+ explicit ARMBaseRegisterInfo();
// Return the opcode that implements 'Op', or 0 if no opcode
unsigned getOpcode(int Op) const;
public:
/// Code Generation virtual methods...
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
- const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const override;
const uint32_t *getNoPreservedMask() const;
/// getThisReturnPreservedMask - Returns a call preserved mask specific to the
@@ -113,7 +104,8 @@ public:
///
/// Should return NULL in the case that the calling convention does not have
/// this property
- const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
+ const uint32_t *getThisReturnPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const;
BitVector getReservedRegs(const MachineFunction &MF) const override;
@@ -124,7 +116,8 @@ public:
getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
const TargetRegisterClass *
- getLargestLegalSuperClass(const TargetRegisterClass *RC) const override;
+ getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const override;
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
@@ -138,8 +131,6 @@ public:
void updateRegAllocHint(unsigned Reg, unsigned NewReg,
MachineFunction &MF) const override;
- bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const override;
-
bool hasBasePointer(const MachineFunction &MF) const;
bool canRealignStack(const MachineFunction &MF) const;
@@ -152,7 +143,7 @@ public:
int64_t Offset) const override;
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
int64_t Offset) const override;
- bool isFrameOffsetLegal(const MachineInstr *MI,
+ bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
int64_t Offset) const override;
bool cannotEliminateFrame(const MachineFunction &MF) const;
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 375d394..9c8d228 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -2265,7 +2265,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CC));
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
@@ -2416,7 +2416,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CC));
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 5a5bd57..830953b 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -293,7 +293,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
"This emitPrologue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
unsigned Align = STI.getFrameLowering()->getStackAlignment();
- unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -742,8 +742,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
"This emitEpilogue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned Align = STI.getFrameLowering()->getStackAlignment();
- unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
int NumBytes = (int)MFI->getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 6ebf640..44cd1ef 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -257,7 +257,7 @@ private:
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
// Form pairs of consecutive R, S, D, or Q registers.
@@ -3086,7 +3086,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
// Store exclusive double return a i32 value which is the return status
// of the issued store.
- EVT ResTys[] = { MVT::i32, MVT::Other };
+ const EVT ResTys[] = {MVT::i32, MVT::Other};
bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
// Place arguments in the right order.
@@ -3472,9 +3472,10 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
bool ARMDAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) {
- assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+ assert(ConstraintID == InlineAsm::Constraint_m &&
+ "unexpected asm memory constraint");
// Require the address to be in a register. That is safe for all ARM
// variants and it is hard to do anything much smarter without knowing
// how the operand is used.
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 56290aa..3b1b8dd 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -23,6 +23,7 @@
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -40,6 +41,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MCSectionMachO.h"
@@ -47,6 +49,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
#include <utility>
using namespace llvm;
@@ -568,14 +571,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::LOAD);
// It is legal to extload from v4i8 to v4i16 or v4i32.
- MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
- MVT::v4i16, MVT::v2i16,
- MVT::v2i32};
- for (unsigned i = 0; i < 6; ++i) {
+ for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
+ MVT::v2i32}) {
for (MVT VT : MVT::integer_vector_valuetypes()) {
- setLoadExtAction(ISD::EXTLOAD, VT, Tys[i], Legal);
- setLoadExtAction(ISD::ZEXTLOAD, VT, Tys[i], Legal);
- setLoadExtAction(ISD::SEXTLOAD, VT, Tys[i], Legal);
+ setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
}
}
}
@@ -614,6 +615,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FRINT, MVT::f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
}
@@ -869,14 +876,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// Various VFP goodness
if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
- // int <-> fp are custom expanded into bit_convert + ARMISD ops.
- if (Subtarget->hasVFP2()) {
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
- }
-
// FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
@@ -1033,11 +1032,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::RBIT: return "ARMISD::RBIT";
- case ARMISD::FTOSI: return "ARMISD::FTOSI";
- case ARMISD::FTOUI: return "ARMISD::FTOUI";
- case ARMISD::SITOF: return "ARMISD::SITOF";
- case ARMISD::UITOF: return "ARMISD::UITOF";
-
case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
case ARMISD::RRX: return "ARMISD::RRX";
@@ -1164,6 +1158,20 @@ const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
return TargetLowering::getRegClassFor(VT);
}
+// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
+// source/dest is aligned and the copy size is large enough. We therefore want
+// to align such objects passed to memory intrinsics.
+bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
+ unsigned &PrefAlign) const {
+ if (!isa<MemIntrinsic>(CI))
+ return false;
+ MinSize = 8;
+ // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
+ // cycle faster than 4-byte aligned LDM.
+ PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
+ return true;
+}
+
// Create a fast isel object.
FastISel *
ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
@@ -1815,16 +1823,16 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
if (isThisReturn) {
// For 'this' returns, use the R0-preserving mask if applicable
- Mask = ARI->getThisReturnPreservedMask(CallConv);
+ Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
if (!Mask) {
// Set isThisReturn to false if the calling convention is not one that
// allows 'returned' to be modeled in this way, so LowerCallResult does
// not try to pass 'this' straight through
isThisReturn = false;
- Mask = ARI->getCallPreservedMask(CallConv);
+ Mask = ARI->getCallPreservedMask(MF, CallConv);
}
} else
- Mask = ARI->getCallPreservedMask(CallConv);
+ Mask = ARI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -1857,60 +1865,61 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
/// on the stack. Remember the next parameter register to allocate,
/// and then confiscate the rest of the parameter registers to insure
/// this.
-void
-ARMTargetLowering::HandleByVal(
- CCState *State, unsigned &size, unsigned Align) const {
- unsigned reg = State->AllocateReg(GPRArgRegs);
+void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
+ unsigned Align) const {
assert((State->getCallOrPrologue() == Prologue ||
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
- if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
- if (Subtarget->isAAPCS_ABI() && Align > 4) {
- unsigned AlignInRegs = Align / 4;
- unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
- for (unsigned i = 0; i < Waste; ++i)
- reg = State->AllocateReg(GPRArgRegs);
- }
- if (reg != 0) {
- unsigned excess = 4 * (ARM::R4 - reg);
-
- // Special case when NSAA != SP and parameter size greater than size of
- // all remained GPR regs. In that case we can't split parameter, we must
- // send it to stack. We also must set NCRN to R4, so waste all
- // remained registers.
- const unsigned NSAAOffset = State->getNextStackOffset();
- if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
- while (State->AllocateReg(GPRArgRegs))
- ;
- return;
- }
+ // Byval (as with any stack) slots are always at least 4 byte aligned.
+ Align = std::max(Align, 4U);
- // First register for byval parameter is the first register that wasn't
- // allocated before this method call, so it would be "reg".
- // If parameter is small enough to be saved in range [reg, r4), then
- // the end (first after last) register would be reg + param-size-in-regs,
- // else parameter would be splitted between registers and stack,
- // end register would be r4 in this case.
- unsigned ByValRegBegin = reg;
- unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4;
- State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
- // Note, first register is allocated in the beginning of function already,
- // allocate remained amount of registers we need.
- for (unsigned i = reg+1; i != ByValRegEnd; ++i)
- State->AllocateReg(GPRArgRegs);
- // A byval parameter that is split between registers and memory needs its
- // size truncated here.
- // In the case where the entire structure fits in registers, we set the
- // size in memory to zero.
- if (size < excess)
- size = 0;
- else
- size -= excess;
- }
+ unsigned Reg = State->AllocateReg(GPRArgRegs);
+ if (!Reg)
+ return;
+
+ unsigned AlignInRegs = Align / 4;
+ unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
+ for (unsigned i = 0; i < Waste; ++i)
+ Reg = State->AllocateReg(GPRArgRegs);
+
+ if (!Reg)
+ return;
+
+ unsigned Excess = 4 * (ARM::R4 - Reg);
+
+ // Special case when NSAA != SP and parameter size greater than size of
+ // all remained GPR regs. In that case we can't split parameter, we must
+ // send it to stack. We also must set NCRN to R4, so waste all
+ // remained registers.
+ const unsigned NSAAOffset = State->getNextStackOffset();
+ if (NSAAOffset != 0 && Size > Excess) {
+ while (State->AllocateReg(GPRArgRegs))
+ ;
+ return;
}
+
+ // First register for byval parameter is the first register that wasn't
+ // allocated before this method call, so it would be "reg".
+ // If parameter is small enough to be saved in range [reg, r4), then
+ // the end (first after last) register would be reg + param-size-in-regs,
+ // else parameter would be splitted between registers and stack,
+ // end register would be r4 in this case.
+ unsigned ByValRegBegin = Reg;
+ unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
+ State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
+ // Note, first register is allocated in the beginning of function already,
+ // allocate remained amount of registers we need.
+ for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
+ State->AllocateReg(GPRArgRegs);
+ // A byval parameter that is split between registers and memory needs its
+ // size truncated here.
+ // In the case where the entire structure fits in registers, we set the
+ // size in memory to zero.
+ Size = std::max<int>(Size - Excess, 0);
}
+
/// MatchingStackOffset - Return true if the given stack call argument is
/// already available in the same position (relatively) of the caller's
/// incoming argument stack.
@@ -1991,7 +2000,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (isCalleeStructRet || isCallerStructRet)
return false;
- // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
+ // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
// emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
// the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
// support in the assembler and linker to be used. This would need to be
@@ -2819,50 +2828,6 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
}
-void
-ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
- unsigned InRegsParamRecordIdx,
- unsigned ArgSize,
- unsigned &ArgRegsSize,
- unsigned &ArgRegsSaveSize)
- const {
- unsigned NumGPRs;
- if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
- unsigned RBegin, REnd;
- CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
- NumGPRs = REnd - RBegin;
- } else {
- unsigned int firstUnalloced;
- firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs);
- NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
- }
-
- unsigned Align = Subtarget->getFrameLowering()->getStackAlignment();
- ArgRegsSize = NumGPRs * 4;
-
- // If parameter is split between stack and GPRs...
- if (NumGPRs && Align > 4 &&
- (ArgRegsSize < ArgSize ||
- InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) {
- // Add padding for part of param recovered from GPRs. For example,
- // if Align == 8, its last byte must be at address K*8 - 1.
- // We need to do it, since remained (stack) part of parameter has
- // stack alignment, and we need to "attach" "GPRs head" without gaps
- // to it:
- // Stack:
- // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes...
- // [ [padding] [GPRs head] ] [ Tail passed via stack ....
- //
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned Padding =
- OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align);
- ArgRegsSaveSize = ArgRegsSize + Padding;
- } else
- // We don't need to extend regs save size for byval parameters if they
- // are passed via GPRs only.
- ArgRegsSaveSize = ArgRegsSize;
-}
-
// The remaining GPRs hold either the beginning of variable-argument
// data, or the beginning of an aggregate passed by value (usually
// byval). Either way, we allocate stack slots adjacent to the data
@@ -2876,13 +2841,8 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
SDLoc dl, SDValue &Chain,
const Value *OrigArg,
unsigned InRegsParamRecordIdx,
- unsigned OffsetFromOrigArg,
- unsigned ArgOffset,
- unsigned ArgSize,
- bool ForceMutable,
- unsigned ByValStoreOffset,
- unsigned TotalArgRegsSaveSize) const {
-
+ int ArgOffset,
+ unsigned ArgSize) const {
// Currently, two use-cases possible:
// Case #1. Non-var-args function, and we meet first byval parameter.
// Setup first unallocated register as first byval register;
@@ -2897,82 +2857,39 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned firstRegToSaveIndex, lastRegToSaveIndex;
unsigned RBegin, REnd;
if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
- firstRegToSaveIndex = RBegin - ARM::R0;
- lastRegToSaveIndex = REnd - ARM::R0;
} else {
- firstRegToSaveIndex = CCInfo.getFirstUnallocated(GPRArgRegs);
- lastRegToSaveIndex = 4;
- }
-
- unsigned ArgRegsSize, ArgRegsSaveSize;
- computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize,
- ArgRegsSize, ArgRegsSaveSize);
-
- // Store any by-val regs to their spots on the stack so that they may be
- // loaded by deferencing the result of formal parameter pointer or va_next.
- // Note: once stack area for byval/varargs registers
- // was initialized, it can't be initialized again.
- if (ArgRegsSaveSize) {
- unsigned Padding = ArgRegsSaveSize - ArgRegsSize;
-
- if (Padding) {
- assert(AFI->getStoredByValParamsPadding() == 0 &&
- "The only parameter may be padded.");
- AFI->setStoredByValParamsPadding(Padding);
- }
-
- int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize,
- Padding +
- ByValStoreOffset -
- (int64_t)TotalArgRegsSaveSize,
- false);
- SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
- if (Padding) {
- MFI->CreateFixedObject(Padding,
- ArgOffset + ByValStoreOffset -
- (int64_t)ArgRegsSaveSize,
- false);
- }
-
- SmallVector<SDValue, 4> MemOps;
- for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
- ++firstRegToSaveIndex, ++i) {
- const TargetRegisterClass *RC;
- if (AFI->isThumb1OnlyFunction())
- RC = &ARM::tGPRRegClass;
- else
- RC = &ARM::GPRRegClass;
+ unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
+ RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
+ REnd = ARM::R4;
+ }
- unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
- SDValue Store =
- DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),
- false, false, 0);
- MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
- DAG.getConstant(4, getPointerTy()));
- }
+ if (REnd != RBegin)
+ ArgOffset = -4 * (ARM::R4 - RBegin);
- AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
+ int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false);
+ SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
- if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
- return FrameIndex;
- } else {
- if (ArgSize == 0) {
- // We cannot allocate a zero-byte object for the first variadic argument,
- // so just make up a size.
- ArgSize = 4;
- }
- // This will point to the next argument passed via stack.
- return MFI->CreateFixedObject(
- ArgSize, ArgOffset, !ForceMutable);
+ SmallVector<SDValue, 4> MemOps;
+ const TargetRegisterClass *RC =
+ AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
+
+ for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
+ unsigned VReg = MF.addLiveIn(Reg, RC);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(OrigArg, 4 * i), false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+ DAG.getConstant(4, getPointerTy()));
}
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+ return FrameIndex;
}
// Setup stack frame, the va_list pointer will start from.
@@ -2990,11 +2907,9 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
// the result of va_next.
// If there is no regs to be stored, just point address after last
// argument passed via stack.
- int FrameIndex =
- StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
- CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable,
- 0, TotalArgRegsSaveSize);
-
+ int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
+ CCInfo.getInRegsParamsCount(),
+ CCInfo.getNextStackOffset(), 4);
AFI->setVarArgsFrameIndex(FrameIndex);
}
@@ -3020,7 +2935,6 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
isVarArg));
SmallVector<SDValue, 16> ArgValues;
- int lastInsIndex = -1;
SDValue ArgValue;
Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
@@ -3030,50 +2944,40 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// We also increase this value in case of varargs function.
AFI->setArgRegsSaveSize(0);
- unsigned ByValStoreOffset = 0;
- unsigned TotalArgRegsSaveSize = 0;
- unsigned ArgRegsSaveSizeMaxAlign = 4;
-
// Calculate the amount of stack space that we need to allocate to store
// byval and variadic arguments that are passed in registers.
// We need to know this before we allocate the first byval or variadic
// argument, as they will be allocated a stack slot below the CFA (Canonical
// Frame Address, the stack pointer at entry to the function).
+ unsigned ArgRegBegin = ARM::R4;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
+ break;
+
CCValAssign &VA = ArgLocs[i];
- if (VA.isMemLoc()) {
- int index = VA.getValNo();
- if (index != lastInsIndex) {
- ISD::ArgFlagsTy Flags = Ins[index].Flags;
- if (Flags.isByVal()) {
- unsigned ExtraArgRegsSize;
- unsigned ExtraArgRegsSaveSize;
- computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProcessed(),
- Flags.getByValSize(),
- ExtraArgRegsSize, ExtraArgRegsSaveSize);
-
- TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
- if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign)
- ArgRegsSaveSizeMaxAlign = Flags.getByValAlign();
- CCInfo.nextInRegsParam();
- }
- lastInsIndex = index;
- }
- }
+ unsigned Index = VA.getValNo();
+ ISD::ArgFlagsTy Flags = Ins[Index].Flags;
+ if (!Flags.isByVal())
+ continue;
+
+ assert(VA.isMemLoc() && "unexpected byval pointer in reg");
+ unsigned RBegin, REnd;
+ CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
+ ArgRegBegin = std::min(ArgRegBegin, RBegin);
+
+ CCInfo.nextInRegsParam();
}
CCInfo.rewindByValRegsInfo();
- lastInsIndex = -1;
+
+ int lastInsIndex = -1;
if (isVarArg && MFI->hasVAStart()) {
- unsigned ExtraArgRegsSize;
- unsigned ExtraArgRegsSaveSize;
- computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0,
- ExtraArgRegsSize, ExtraArgRegsSaveSize);
- TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
+ unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
+ if (RegIdx != array_lengthof(GPRArgRegs))
+ ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
}
- // If the arg regs save area contains N-byte aligned values, the
- // bottom of it must be at least N-byte aligned.
- TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign);
- TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U);
+
+ unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
+ AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -3178,18 +3082,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
"Byval arguments cannot be implicit");
unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
- ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
- int FrameIndex = StoreByValRegs(
- CCInfo, DAG, dl, Chain, CurOrigArg,
- CurByValIndex,
- Ins[VA.getValNo()].PartOffset,
- VA.getLocMemOffset(),
- Flags.getByValSize(),
- true /*force mutable frames*/,
- ByValStoreOffset,
- TotalArgRegsSaveSize);
- ByValStoreOffset += Flags.getByValSize();
- ByValStoreOffset = std::min(ByValStoreOffset, 16U);
+ int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg,
+ CurByValIndex, VA.getLocMemOffset(),
+ Flags.getByValSize());
InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
CCInfo.nextInRegsParam();
} else {
@@ -3894,7 +3789,6 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorFP_TO_INT(Op, DAG);
-
if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
RTLIB::Libcall LC;
if (Op.getOpcode() == ISD::FP_TO_SINT)
@@ -3907,20 +3801,7 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
/*isSigned*/ false, SDLoc(Op)).first;
}
- SDLoc dl(Op);
- unsigned Opc;
-
- switch (Op.getOpcode()) {
- default: llvm_unreachable("Invalid opcode!");
- case ISD::FP_TO_SINT:
- Opc = ARMISD::FTOSI;
- break;
- case ISD::FP_TO_UINT:
- Opc = ARMISD::FTOUI;
- break;
- }
- Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
- return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+ return Op;
}
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
@@ -3960,7 +3841,6 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorINT_TO_FP(Op, DAG);
-
if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
RTLIB::Libcall LC;
if (Op.getOpcode() == ISD::SINT_TO_FP)
@@ -3973,21 +3853,7 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
/*isSigned*/ false, SDLoc(Op)).first;
}
- SDLoc dl(Op);
- unsigned Opc;
-
- switch (Op.getOpcode()) {
- default: llvm_unreachable("Invalid opcode!");
- case ISD::SINT_TO_FP:
- Opc = ARMISD::SITOF;
- break;
- case ISD::UINT_TO_FP:
- Opc = ARMISD::UITOF;
- break;
- }
-
- Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
- return DAG.getNode(Opc, dl, VT, Op);
+ return Op;
}
SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
@@ -7239,16 +7105,20 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
// Load an immediate to varEnd.
unsigned varEnd = MRI.createVirtualRegister(TRC);
- if (IsThumb2) {
+ if (Subtarget->useMovt(*MF)) {
unsigned Vtmp = varEnd;
if ((LoopSize & 0xFFFF0000) != 0)
Vtmp = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp)
- .addImm(LoopSize & 0xFFFF));
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(IsThumb2 ? ARM::t2MOVi16 : ARM::MOVi16),
+ Vtmp).addImm(LoopSize & 0xFFFF));
if ((LoopSize & 0xFFFF0000) != 0)
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
- .addReg(Vtmp).addImm(LoopSize >> 16));
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(IsThumb2 ? ARM::t2MOVTi16 : ARM::MOVTi16),
+ varEnd)
+ .addReg(Vtmp)
+ .addImm(LoopSize >> 16));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
@@ -10076,6 +9946,28 @@ bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
return false;
}
+bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
+ EVT VT = ExtVal.getValueType();
+
+ if (!isTypeLegal(VT))
+ return false;
+
+ // Don't create a loadext if we can fold the extension into a wide/long
+ // instruction.
+ // If there's more than one user instruction, the loadext is desirable no
+ // matter what. There can be two uses by the same instruction.
+ if (ExtVal->use_empty() ||
+ !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
+ return true;
+
+ SDNode *U = *ExtVal->use_begin();
+ if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
+ U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL))
+ return false;
+
+ return true;
+}
+
bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
@@ -10289,9 +10181,9 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
// Thumb2 and ARM modes can use cmn for negative immediates.
if (!Subtarget->isThumb())
- return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1;
+ return ARM_AM::getSOImmVal(std::abs(Imm)) != -1;
if (Subtarget->isThumb2())
- return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1;
+ return ARM_AM::getT2SOImmVal(std::abs(Imm)) != -1;
// Thumb1 doesn't have cmn, and only 8-bit immediates.
return Imm >= 0 && Imm <= 255;
}
@@ -10302,7 +10194,7 @@ bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
/// immediate into a register.
bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
// Same encoding for add/sub, just flip the sign.
- int64_t AbsImm = llvm::abs64(Imm);
+ int64_t AbsImm = std::abs(Imm);
if (!Subtarget->isThumb())
return ARM_AM::getSOImmVal(AbsImm) != -1;
if (Subtarget->isThumb2())
@@ -11198,9 +11090,12 @@ bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
// For the real atomic operations, we have ldrex/strex up to 32 bits,
// and up to 64 bits on the non-M profiles
-bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+TargetLoweringBase::AtomicRMWExpansionKind
+ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
- return Size <= (Subtarget->isMClass() ? 32U : 64U);
+ return (Size <= (Subtarget->isMClass() ? 32U : 64U))
+ ? AtomicRMWExpansionKind::LLSC
+ : AtomicRMWExpansionKind::None;
}
// This has so far only been implemented for MachO.
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index ec1407d..dd4c954 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -65,11 +65,6 @@ namespace llvm {
RBIT, // ARM bitreverse instruction
- FTOSI, // FP to sint within a FP register.
- FTOUI, // FP to uint within a FP register.
- SITOF, // sint to FP within a FP register.
- UITOF, // uint to FP within a FP register.
-
SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
@@ -283,6 +278,8 @@ namespace llvm {
using TargetLowering::isZExtFree;
bool isZExtFree(SDValue Val, EVT VT2) const override;
+ bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
+
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
@@ -346,6 +343,12 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ // FIXME: Map different constraints differently.
+ return InlineAsm::Constraint_m;
+ }
+
const ARMSubtarget* getSubtarget() const {
return Subtarget;
}
@@ -360,6 +363,9 @@ namespace llvm {
return true;
}
+ bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
+ unsigned &PrefAlign) const override;
+
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
@@ -404,7 +410,8 @@ namespace llvm {
bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ TargetLoweringBase::AtomicRMWExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
bool useLoadStackGuardNode() const override;
@@ -525,12 +532,8 @@ namespace llvm {
SDLoc dl, SDValue &Chain,
const Value *OrigArg,
unsigned InRegsParamRecordIdx,
- unsigned OffsetFromOrigArg,
- unsigned ArgOffset,
- unsigned ArgSize,
- bool ForceMutable,
- unsigned ByValStoreOffset,
- unsigned TotalArgRegsSaveSize) const;
+ int ArgOffset,
+ unsigned ArgSize) const;
void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
SDLoc dl, SDValue &Chain,
@@ -538,12 +541,6 @@ namespace llvm {
unsigned TotalArgRegsSaveSize,
bool ForceMutable = false) const;
- void computeRegArea(CCState &CCInfo, MachineFunction &MF,
- unsigned InRegsParamRecordIdx,
- unsigned ArgSize,
- unsigned &ArgRegsSize,
- unsigned &ArgRegsSaveSize) const;
-
SDValue
LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 7d27cf3..e79608d 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -983,7 +983,12 @@ class ARMV5MOPat<dag pattern, dag result> : Pat<pattern, result> {
class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsARM, HasV6];
}
-
+class VFPPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [HasVFP2];
+}
+class VFPNoNEONPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [HasVFP2, DontUseNEONForFP];
+}
//===----------------------------------------------------------------------===//
// Thumb Instruction Format Definitions.
//
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index bc617f0..7c004c9 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -30,8 +30,7 @@
using namespace llvm;
ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI(STI) {
-}
+ : ARMBaseInstrInfo(STI), RI() {}
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
@@ -146,6 +145,10 @@ namespace {
return false;
const ARMSubtarget &STI =
static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ // Don't do this for Thumb1.
+ if (STI.isThumb1Only())
+ return false;
+
const TargetMachine &TM = MF.getTarget();
if (TM.getRelocationModel() != Reloc::PIC_)
return false;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 126c552..c3984ca 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -199,6 +199,9 @@ def HasV6M : Predicate<"Subtarget->hasV6MOps()">,
def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">,
AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
+def HasV6K : Predicate<"Subtarget->hasV6KOps()">,
+ AssemblerPredicate<"HasV6KOps", "armv6k">;
+def NoV6K : Predicate<"!Subtarget->hasV6KOps()">;
def HasV7 : Predicate<"Subtarget->hasV7Ops()">,
AssemblerPredicate<"HasV7Ops", "armv7">;
def HasV8 : Predicate<"Subtarget->hasV8Ops()">,
@@ -223,6 +226,8 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
AssemblerPredicate<"FeatureCrypto", "crypto">;
def HasCRC : Predicate<"Subtarget->hasCRC()">,
AssemblerPredicate<"FeatureCRC", "crc">;
+def HasV8_1a : Predicate<"Subtarget->hasV8_1a()">,
+ AssemblerPredicate<"FeatureV8_1a", "v8.1a">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<"FeatureFP16","half-float">;
def HasDivide : Predicate<"Subtarget->hasDivide()">,
@@ -1835,11 +1840,11 @@ def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary,
let Inst{7-0} = imm;
}
-def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>;
-def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6T2]>;
-def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6T2]>;
-def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>;
-def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6K]>;
def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>;
def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 2a7b4b5..a6a07a8 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -2790,7 +2790,7 @@ class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
imm:$lane)))))))]>;
class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
- ValueType Ty, SDNode MulOp, SDNode ShOp>
+ ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
: N3VLane16<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd),
(ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
@@ -2826,7 +2826,7 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy,
- SDNode MulOp, SDNode ShOp>
+ SDPatternOperator MulOp, SDPatternOperator ShOp>
: N3VLane16<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
(ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
@@ -3674,7 +3674,7 @@ multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
multiclass N3VMulOpSL_HS<bits<4> op11_8,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
- string OpcodeStr, string Dt, SDNode ShOp> {
+ string OpcodeStr, string Dt, SDPatternOperator ShOp> {
def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
@@ -3711,27 +3711,38 @@ multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
}
// Neon 3-argument intrinsics,
-// element sizes of 8, 16 and 32 bits:
-multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
- InstrItinClass itinD, InstrItinClass itinQ,
+// element sizes of 16 and 32 bits:
+multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt, SDPatternOperator IntOp> {
// 64-bit vector types.
- def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
- OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
- def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD,
+ def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16,
OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
- def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD,
+ def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32,
OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
// 128-bit vector types.
- def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ,
- OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
- def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ,
+ def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16,
OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
- def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ,
+ def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32,
OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
}
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp>
+ :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32,
+ itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{
+ // 64-bit vector types.
+ def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+ // 128-bit vector types.
+ def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
+}
// Neon Long Multiply-Op vector operations,
// element sizes of 8, 16 and 32 bits:
@@ -4305,6 +4316,147 @@ defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
+let Predicates = [HasNEON, HasV8_1a] in {
+ // v8.1a Neon Rounding Double Multiply-Op vector operations,
+ // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long
+ // (Q += D * D)
+ defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
+ null_frag>;
+ def : Pat<(v4i16 (int_arm_neon_vqadds
+ (v4i16 DPR:$src1),
+ (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
+ (v4i16 DPR:$Vm))))),
+ (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
+ def : Pat<(v2i32 (int_arm_neon_vqadds
+ (v2i32 DPR:$src1),
+ (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
+ (v2i32 DPR:$Vm))))),
+ (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
+ def : Pat<(v8i16 (int_arm_neon_vqadds
+ (v8i16 QPR:$src1),
+ (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
+ (v8i16 QPR:$Vm))))),
+ (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
+ def : Pat<(v4i32 (int_arm_neon_vqadds
+ (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
+ (v4i32 QPR:$Vm))))),
+ (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
+
+ defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
+ null_frag>;
+ def : Pat<(v4i16 (int_arm_neon_vqadds
+ (v4i16 DPR:$src1),
+ (v4i16 (int_arm_neon_vqrdmulh
+ (v4i16 DPR:$Vn),
+ (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ imm:$lane)))))),
+ (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
+ imm:$lane))>;
+ def : Pat<(v2i32 (int_arm_neon_vqadds
+ (v2i32 DPR:$src1),
+ (v2i32 (int_arm_neon_vqrdmulh
+ (v2i32 DPR:$Vn),
+ (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ imm:$lane)))))),
+ (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
+ imm:$lane))>;
+ def : Pat<(v8i16 (int_arm_neon_vqadds
+ (v8i16 QPR:$src1),
+ (v8i16 (int_arm_neon_vqrdmulh
+ (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3),
+ imm:$lane)))))),
+ (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1),
+ (v8i16 QPR:$src2),
+ (v4i16 (EXTRACT_SUBREG
+ QPR:$src3,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+ def : Pat<(v4i32 (int_arm_neon_vqadds
+ (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqrdmulh
+ (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3),
+ imm:$lane)))))),
+ (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
+ (v4i32 QPR:$src2),
+ (v2i32 (EXTRACT_SUBREG
+ QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+ // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long
+ // (Q -= D * D)
+ defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
+ null_frag>;
+ def : Pat<(v4i16 (int_arm_neon_vqsubs
+ (v4i16 DPR:$src1),
+ (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
+ (v4i16 DPR:$Vm))))),
+ (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
+ def : Pat<(v2i32 (int_arm_neon_vqsubs
+ (v2i32 DPR:$src1),
+ (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
+ (v2i32 DPR:$Vm))))),
+ (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
+ def : Pat<(v8i16 (int_arm_neon_vqsubs
+ (v8i16 QPR:$src1),
+ (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
+ (v8i16 QPR:$Vm))))),
+ (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
+ def : Pat<(v4i32 (int_arm_neon_vqsubs
+ (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
+ (v4i32 QPR:$Vm))))),
+ (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
+
+ defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
+ null_frag>;
+ def : Pat<(v4i16 (int_arm_neon_vqsubs
+ (v4i16 DPR:$src1),
+ (v4i16 (int_arm_neon_vqrdmulh
+ (v4i16 DPR:$Vn),
+ (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ imm:$lane)))))),
+ (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
+ def : Pat<(v2i32 (int_arm_neon_vqsubs
+ (v2i32 DPR:$src1),
+ (v2i32 (int_arm_neon_vqrdmulh
+ (v2i32 DPR:$Vn),
+ (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ imm:$lane)))))),
+ (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
+ imm:$lane))>;
+ def : Pat<(v8i16 (int_arm_neon_vqsubs
+ (v8i16 QPR:$src1),
+ (v8i16 (int_arm_neon_vqrdmulh
+ (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3),
+ imm:$lane)))))),
+ (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
+ (v8i16 QPR:$src2),
+ (v4i16 (EXTRACT_SUBREG
+ QPR:$src3,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+ def : Pat<(v4i32 (int_arm_neon_vqsubs
+ (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqrdmulh
+ (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3),
+ imm:$lane)))))),
+ (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
+ (v4i32 QPR:$src2),
+ (v2i32 (EXTRACT_SUBREG
+ QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+}
// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
"vqdmlal", "s", null_frag>;
@@ -6158,6 +6310,21 @@ class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
(v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
+class NVCVTIFPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode GPR:$a)),
+ (f32 (EXTRACT_SUBREG
+ (v2f32 (Inst
+ (INSERT_SUBREG
+ (v2f32 (IMPLICIT_DEF)),
+ (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))),
+ ssub_0))>;
+class NVCVTFIPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(i32 (OpNode SPR:$a)),
+ (i32 (EXTRACT_SUBREG
+ (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$a, ssub_0))),
+ ssub_0))>;
+
def : N3VSPat<fadd, VADDfd>;
def : N3VSPat<fsub, VSUBfd>;
def : N3VSPat<fmul, VMULfd>;
@@ -6173,10 +6340,22 @@ def : N2VSPat<fabs, VABSfd>;
def : N2VSPat<fneg, VNEGfd>;
def : N3VSPat<NEONfmax, VMAXfd>;
def : N3VSPat<NEONfmin, VMINfd>;
-def : N2VSPat<arm_ftosi, VCVTf2sd>;
-def : N2VSPat<arm_ftoui, VCVTf2ud>;
-def : N2VSPat<arm_sitof, VCVTs2fd>;
-def : N2VSPat<arm_uitof, VCVTu2fd>;
+def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
+def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
+def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
+def : NVCVTIFPat<uint_to_fp, VCVTu2fd>;
+
+// NEON doesn't have any f64 conversions, so provide patterns to make
+// sure the VFP conversions match when extracting from a vector.
+def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
+ (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
+ (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
+ (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
+ (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+
// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
def : Pat<(f32 (bitconvert GPR:$a)),
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index e0a9314..afff016 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -11,16 +11,10 @@
//
//===----------------------------------------------------------------------===//
-def SDT_FTOI : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
-def SDT_ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>]>;
-def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>;
-def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>;
-def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>;
-def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
@@ -633,7 +627,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"),
- [(set SPR:$Sd, (arm_ftosi (node SPR:$Sm)))]>,
+ []>,
Requires<[HasFPARMv8]> {
let Inst{17-16} = rm;
}
@@ -641,7 +635,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"),
- [(set SPR:$Sd, (arm_ftoui (node SPR:$Sm)))]>,
+ []>,
Requires<[HasFPARMv8]> {
let Inst{17-16} = rm;
}
@@ -649,7 +643,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
(outs SPR:$Sd), (ins DPR:$Dm),
NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"),
- [(set SPR:$Sd, (arm_ftosi (f64 (node (f64 DPR:$Dm)))))]>,
+ []>,
Requires<[HasFPARMv8, HasDPVFP]> {
bits<5> Dm;
@@ -664,7 +658,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
(outs SPR:$Sd), (ins DPR:$Dm),
NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"),
- [(set SPR:$Sd, (arm_ftoui (f64 (node (f64 DPR:$Dm)))))]>,
+ []>,
Requires<[HasFPARMv8, HasDPVFP]> {
bits<5> Dm;
@@ -676,6 +670,27 @@ multiclass vcvt_inst<string opc, bits<2> rm,
let Inst{8} = 1;
}
}
+
+ let Predicates = [HasFPARMv8] in {
+ def : Pat<(i32 (fp_to_sint (node SPR:$a))),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(NAME#"SS") SPR:$a),
+ GPR)>;
+ def : Pat<(i32 (fp_to_uint (node SPR:$a))),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(NAME#"US") SPR:$a),
+ GPR)>;
+ }
+ let Predicates = [HasFPARMv8, HasDPVFP] in {
+ def : Pat<(i32 (fp_to_sint (node (f64 DPR:$a)))),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(NAME#"SD") DPR:$a),
+ GPR)>;
+ def : Pat<(i32 (fp_to_uint (node (f64 DPR:$a)))),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(NAME#"UD") DPR:$a),
+ GPR)>;
+ }
}
defm VCVTA : vcvt_inst<"a", 0b00, frnd>;
@@ -980,14 +995,22 @@ class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
- [(set DPR:$Dd, (f64 (arm_sitof SPR:$Sm)))]> {
+ []> {
let Inst{7} = 1; // s32
}
+let Predicates=[HasVFP2, HasDPVFP] in {
+ def : VFPPat<(f64 (sint_to_fp GPR:$a)),
+ (VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+ def : VFPPat<(f64 (sint_to_fp (i32 (load addrmode5:$a)))),
+ (VSITOD (VLDRS addrmode5:$a))>;
+}
+
def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
(outs SPR:$Sd),(ins SPR:$Sm),
IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
- [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> {
+ []> {
let Inst{7} = 1; // s32
// Some single precision VFP instructions may be executed on both NEON and
@@ -995,17 +1018,31 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
let D = VFPNeonA8Domain;
}
+def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)),
+ (VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (load addrmode5:$a)))),
+ (VSITOS (VLDRS addrmode5:$a))>;
+
def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
- [(set DPR:$Dd, (f64 (arm_uitof SPR:$Sm)))]> {
+ []> {
let Inst{7} = 0; // u32
}
+let Predicates=[HasVFP2, HasDPVFP] in {
+ def : VFPPat<(f64 (uint_to_fp GPR:$a)),
+ (VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+ def : VFPPat<(f64 (uint_to_fp (i32 (load addrmode5:$a)))),
+ (VUITOD (VLDRS addrmode5:$a))>;
+}
+
def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
- [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> {
+ []> {
let Inst{7} = 0; // u32
// Some single precision VFP instructions may be executed on both NEON and
@@ -1013,6 +1050,12 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
let D = VFPNeonA8Domain;
}
+def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)),
+ (VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (load addrmode5:$a)))),
+ (VUITOS (VLDRS addrmode5:$a))>;
+
// FP -> Int:
class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
@@ -1055,14 +1098,22 @@ class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
- [(set SPR:$Sd, (arm_ftosi (f64 DPR:$Dm)))]> {
+ []> {
let Inst{7} = 1; // Z bit
}
+let Predicates=[HasVFP2, HasDPVFP] in {
+ def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))),
+ (COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>;
+
+ def : VFPPat<(store (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
+ (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
+}
+
def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
- [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> {
+ []> {
let Inst{7} = 1; // Z bit
// Some single precision VFP instructions may be executed on both NEON and
@@ -1070,17 +1121,31 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
let D = VFPNeonA8Domain;
}
+def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)),
+ (COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>;
+
+def : VFPNoNEONPat<(store (i32 (fp_to_sint (f32 SPR:$a))), addrmode5:$ptr),
+ (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
+
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
- [(set SPR:$Sd, (arm_ftoui (f64 DPR:$Dm)))]> {
+ []> {
let Inst{7} = 1; // Z bit
}
+let Predicates=[HasVFP2, HasDPVFP] in {
+ def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))),
+ (COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>;
+
+ def : VFPPat<(store (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
+ (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
+}
+
def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
- [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> {
+ []> {
let Inst{7} = 1; // Z bit
// Some single precision VFP instructions may be executed on both NEON and
@@ -1088,6 +1153,12 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
let D = VFPNeonA8Domain;
}
+def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)),
+ (COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>;
+
+def : VFPNoNEONPat<(store (i32 (fp_to_uint (f32 SPR:$a))), addrmode5:$ptr),
+ (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
+
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
let Uses = [FPSCR] in {
// FIXME: Verify encoding after integrated assembler is working.
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index a8d0981..eca8e28 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -19,7 +19,7 @@
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "Thumb1RegisterInfo.h"
+#include "ThumbRegisterInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -38,6 +38,7 @@
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index ddfdb52..a68ab1b 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -149,11 +149,7 @@ public:
unsigned getStoredByValParamsPadding() const { return StByValParamsPadding; }
void setStoredByValParamsPadding(unsigned p) { StByValParamsPadding = p; }
- unsigned getArgRegsSaveSize(unsigned Align = 0) const {
- if (!Align)
- return ArgRegsSaveSize;
- return (ArgRegsSaveSize + Align - 1) & ~(Align - 1);
- }
+ unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; }
void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; }
unsigned getReturnRegsCount() const { return ReturnRegsCount; }
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index 80b4b48..e6e8cdf 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -16,6 +16,4 @@ using namespace llvm;
void ARMRegisterInfo::anchor() { }
-ARMRegisterInfo::ARMRegisterInfo(const ARMSubtarget &sti)
- : ARMBaseRegisterInfo(sti) {
-}
+ARMRegisterInfo::ARMRegisterInfo() : ARMBaseRegisterInfo() {}
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index b623173..e2e650e 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -23,7 +23,7 @@ class ARMSubtarget;
struct ARMRegisterInfo : public ARMBaseRegisterInfo {
virtual void anchor();
public:
- ARMRegisterInfo(const ARMSubtarget &STI);
+ ARMRegisterInfo();
};
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 89624dd..fbec9e6 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -129,6 +129,7 @@ void ARMSubtarget::initializeEnvironment() {
HasV5TEOps = false;
HasV6Ops = false;
HasV6MOps = false;
+ HasV6KOps = false;
HasV6T2Ops = false;
HasV7Ops = false;
HasV8Ops = false;
@@ -165,6 +166,7 @@ void ARMSubtarget::initializeEnvironment() {
HasTrustZone = false;
HasCrypto = false;
HasCRC = false;
+ HasV8_1a = false;
HasZeroCycleZeroing = false;
AllowsUnalignedMem = false;
Thumb2DSP = false;
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index f4deddf..f36cd5c 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -56,13 +56,14 @@ protected:
ARMProcClassEnum ARMProcClass;
/// HasV4TOps, HasV5TOps, HasV5TEOps,
- /// HasV6Ops, HasV6MOps, HasV6T2Ops, HasV7Ops, HasV8Ops -
+ /// HasV6Ops, HasV6MOps, HasV6KOps, HasV6T2Ops, HasV7Ops, HasV8Ops -
/// Specify whether target support specific ARM ISA variants.
bool HasV4TOps;
bool HasV5TOps;
bool HasV5TEOps;
bool HasV6Ops;
bool HasV6MOps;
+ bool HasV6KOps;
bool HasV6T2Ops;
bool HasV7Ops;
bool HasV8Ops;
@@ -181,6 +182,9 @@ protected:
/// HasCRC - if true, processor supports CRC instructions
bool HasCRC;
+ /// HasV8_1a - if true, the processor has V8.1a: PAN and RDMA extensions
+ bool HasV8_1a;
+
/// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
/// particularly effective at zeroing a VFP register.
bool HasZeroCycleZeroing;
@@ -287,6 +291,7 @@ public:
bool hasV5TEOps() const { return HasV5TEOps; }
bool hasV6Ops() const { return HasV6Ops; }
bool hasV6MOps() const { return HasV6MOps; }
+ bool hasV6KOps() const { return HasV6KOps; }
bool hasV6T2Ops() const { return HasV6T2Ops; }
bool hasV7Ops() const { return HasV7Ops; }
bool hasV8Ops() const { return HasV8Ops; }
@@ -311,6 +316,7 @@ public:
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
+ bool hasV8_1a() const { return HasV8_1a; }
bool hasVirtualization() const { return HasVirtualization; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index a97a058..1bee1b0 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -37,6 +37,11 @@ EnableAtomicTidy("arm-atomic-cfg-tidy", cl::Hidden,
" to make use of cmpxchg flow-based information"),
cl::init(true));
+static cl::opt<bool>
+EnableARMLoadStoreOpt("arm-load-store-opt", cl::Hidden,
+ cl::desc("Enable ARM load/store optimization pass"),
+ cl::init(true));
+
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget);
@@ -105,9 +110,11 @@ computeTargetABI(const Triple &TT, StringRef CPU,
return TargetABI;
}
-static std::string computeDataLayout(const Triple &TT,
- ARMBaseTargetMachine::ARMABI ABI,
+static std::string computeDataLayout(StringRef TT, StringRef CPU,
+ const TargetOptions &Options,
bool isLittle) {
+ const Triple Triple(TT);
+ auto ABI = computeTargetABI(Triple, CPU, Options);
std::string Ret = "";
if (isLittle)
@@ -117,7 +124,7 @@ static std::string computeDataLayout(const Triple &TT,
// Big endian.
Ret += "E";
- Ret += DataLayout::getManglingComponent(TT);
+ Ret += DataLayout::getManglingComponent(Triple);
// Pointers are 32 bits and aligned to 32 bits.
Ret += "-p:32:32";
@@ -147,7 +154,7 @@ static std::string computeDataLayout(const Triple &TT,
// The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
// aligned everywhere else.
- if (TT.isOSNaCl())
+ if (Triple.isOSNaCl())
Ret += "-S128";
else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS)
Ret += "-S64";
@@ -164,9 +171,9 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool isLittle)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
+ CPU, FS, Options, RM, CM, OL),
TargetABI(computeTargetABI(Triple(TT), CPU, Options)),
- DL(computeDataLayout(Triple(TT), TargetABI, isLittle)),
TLOF(createTLOF(Triple(getTargetTriple()))),
Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) {
@@ -325,7 +332,7 @@ void ARMPassConfig::addIRPasses() {
}
bool ARMPassConfig::addPreISel() {
- if (TM->getOptLevel() != CodeGenOpt::None)
+ if (TM->getOptLevel() == CodeGenOpt::Aggressive)
// FIXME: This is using the thumb1 only constant value for
// maximal global offset for merging globals. We may want
// to look into using the old value for non-thumb1 code of
@@ -339,32 +346,30 @@ bool ARMPassConfig::addPreISel() {
bool ARMPassConfig::addInstSelector() {
addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
- const ARMSubtarget *Subtarget = &getARMSubtarget();
- if (Subtarget->isTargetELF() && !Subtarget->isThumb1Only() &&
+ if (Triple(TM->getTargetTriple()).isOSBinFormatELF() &&
TM->Options.EnableFastISel)
addPass(createARMGlobalBaseRegPass());
return false;
}
void ARMPassConfig::addPreRegAlloc() {
- if (getOptLevel() != CodeGenOpt::None)
- addPass(createARMLoadStoreOptimizationPass(true));
- if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9())
+ if (getOptLevel() != CodeGenOpt::None) {
addPass(createMLxExpansionPass());
- // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
- // enabled when NEON is available.
- if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA15() &&
- getARMSubtarget().hasNEON() && !DisableA15SDOptimization) {
- addPass(createA15SDOptimizerPass());
+
+ if (EnableARMLoadStoreOpt)
+ addPass(createARMLoadStoreOptimizationPass(/* pre-register alloc */ true));
+
+ if (!DisableA15SDOptimization)
+ addPass(createA15SDOptimizerPass());
}
}
void ARMPassConfig::addPreSched2() {
if (getOptLevel() != CodeGenOpt::None) {
- addPass(createARMLoadStoreOptimizationPass());
+ if (EnableARMLoadStoreOpt)
+ addPass(createARMLoadStoreOptimizationPass());
- if (getARMSubtarget().hasNEON())
- addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
+ addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
}
// Expand some pseudo instructions into multiple instructions to allow
@@ -372,26 +377,21 @@ void ARMPassConfig::addPreSched2() {
addPass(createARMExpandPseudoPass());
if (getOptLevel() != CodeGenOpt::None) {
- if (!getARMSubtarget().isThumb1Only()) {
- // in v8, IfConversion depends on Thumb instruction widths
- if (getARMSubtarget().restrictIT() &&
- !getARMSubtarget().prefers32BitThumb())
- addPass(createThumb2SizeReductionPass());
+ // in v8, IfConversion depends on Thumb instruction widths
+ if (getARMSubtarget().restrictIT())
+ addPass(createThumb2SizeReductionPass());
+ if (!getARMSubtarget().isThumb1Only())
addPass(&IfConverterID);
- }
}
- if (getARMSubtarget().isThumb2())
- addPass(createThumb2ITBlockPass());
+ addPass(createThumb2ITBlockPass());
}
void ARMPassConfig::addPreEmitPass() {
- if (getARMSubtarget().isThumb2()) {
- if (!getARMSubtarget().prefers32BitThumb())
- addPass(createThumb2SizeReductionPass());
+ addPass(createThumb2SizeReductionPass());
- // Constant island pass work on unbundled instructions.
+ // Constant island pass work on unbundled instructions.
+ if (getARMSubtarget().isThumb2())
addPass(&UnpackMachineBundlesID);
- }
addPass(createARMOptimizeBarriersPass());
addPass(createARMConstantIslandPass());
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 7f6a1ee..20ca97b 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -30,7 +30,6 @@ public:
} TargetABI;
protected:
- const DataLayout DL;
std::unique_ptr<TargetLoweringObjectFile> TLOF;
ARMSubtarget Subtarget;
bool isLittle;
@@ -45,9 +44,8 @@ public:
bool isLittle);
~ARMBaseTargetMachine() override;
- const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
const ARMSubtarget *getSubtargetImpl(const Function &F) const override;
- const DataLayout *getDataLayout() const override { return &DL; }
bool isLittleEndian() const { return isLittle; }
/// \brief Get the TargetIRAnalysis for this target.
diff --git a/lib/Target/ARM/Android.mk b/lib/Target/ARM/Android.mk
index 55a5775..6694b53 100644
--- a/lib/Target/ARM/Android.mk
+++ b/lib/Target/ARM/Android.mk
@@ -4,6 +4,7 @@ arm_codegen_TBLGEN_TABLES := \
ARMGenRegisterInfo.inc \
ARMGenInstrInfo.inc \
ARMGenCodeEmitter.inc \
+ ARMGenCodeEmitter.inc \
ARMGenMCCodeEmitter.inc \
ARMGenMCPseudoLowering.inc \
ARMGenAsmWriter.inc \
@@ -41,10 +42,9 @@ arm_codegen_SRC_FILES := \
MLxExpansionPass.cpp \
Thumb1FrameLowering.cpp \
Thumb1InstrInfo.cpp \
- Thumb1RegisterInfo.cpp \
+ ThumbRegisterInfo.cpp \
Thumb2ITBlockPass.cpp \
Thumb2InstrInfo.cpp \
- Thumb2RegisterInfo.cpp \
Thumb2SizeReduction.cpp
# For the host
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 59461e8..2215efb 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -276,6 +276,9 @@ class ARMAsmParser : public MCTargetAsmParser {
bool hasD16() const {
return STI.getFeatureBits() & ARM::FeatureD16;
}
+ bool hasV8_1a() const {
+ return STI.getFeatureBits() & ARM::FeatureV8_1a;
+ }
void SwitchMode() {
uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
@@ -342,10 +345,10 @@ public:
};
- ARMAsmParser(MCSubtargetInfo & _STI, MCAsmParser & _Parser,
+ ARMAsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI), MII(MII), UC(_Parser) {
- MCAsmParserExtension::Initialize(_Parser);
+ : STI(STI), MII(MII), UC(Parser) {
+ MCAsmParserExtension::Initialize(Parser);
// Cache the MCRegisterInfo.
MRI = getContext().getRegisterInfo();
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 2530640..0b698197 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -40,10 +40,9 @@ add_llvm_target(ARMCodeGen
MLxExpansionPass.cpp
Thumb1FrameLowering.cpp
Thumb1InstrInfo.cpp
- Thumb1RegisterInfo.cpp
+ ThumbRegisterInfo.cpp
Thumb2ITBlockPass.cpp
Thumb2InstrInfo.cpp
- Thumb2RegisterInfo.cpp
Thumb2SizeReduction.cpp
)
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 16eea33..e15323d 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -637,12 +637,12 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
printRegName(O, MO1.getReg());
unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
- unsigned Op = ARM_AM::getAM5Op(MO2.getImm());
+ ARM_AM::AddrOpc Op = ARM_AM::getAM5Op(MO2.getImm());
if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
O << ", "
<< markup("<imm:")
<< "#"
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
+ << ARM_AM::getAddrOpcStr(Op)
<< ImmOffs * 4
<< markup(">");
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMArchName.def b/lib/Target/ARM/MCTargetDesc/ARMArchName.def
index 9f007a0..96a0c1a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMArchName.def
+++ b/lib/Target/ARM/MCTargetDesc/ARMArchName.def
@@ -30,6 +30,7 @@ ARM_ARCH_NAME("armv5t", ARMV5T, "5T", v5T)
ARM_ARCH_NAME("armv5te", ARMV5TE, "5TE", v5TE)
ARM_ARCH_NAME("armv6", ARMV6, "6", v6)
ARM_ARCH_NAME("armv6j", ARMV6J, "6J", v6)
+ARM_ARCH_NAME("armv6k", ARMV6K, "6K", v6K)
ARM_ARCH_NAME("armv6t2", ARMV6T2, "6T2", v6T2)
ARM_ARCH_NAME("armv6z", ARMV6Z, "6Z", v6KZ)
ARM_ARCH_NAME("armv6zk", ARMV6ZK, "6ZK", v6KZ)
@@ -43,6 +44,8 @@ ARM_ARCH_NAME("armv7-m", ARMV7M, "7-M", v7)
ARM_ARCH_ALIAS("armv7m", ARMV7M)
ARM_ARCH_NAME("armv8-a", ARMV8A, "8-A", v8)
ARM_ARCH_ALIAS("armv8a", ARMV8A)
+ARM_ARCH_NAME("armv8.1-a", ARMV8_1A, "8.1-A", v8)
+ARM_ARCH_ALIAS("armv8.1a", ARMV8_1A)
ARM_ARCH_NAME("iwmmxt", IWMMXT, "iwmmxt", v5TE)
ARM_ARCH_NAME("iwmmxt2", IWMMXT2, "iwmmxt2", v5TE)
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 2b65520..9648ffa 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -783,6 +783,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
break;
+ case ARM::ARMV6K:
case ARM::ARMV6Z:
case ARM::ARMV6ZK:
setAttributeItem(ARM_ISA_use, Allowed, false);
@@ -816,6 +817,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
break;
case ARM::ARMV8A:
+ case ARM::ARMV8_1A:
setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
setAttributeItem(ARM_ISA_use, Allowed, false);
setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
@@ -913,9 +915,8 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
setAttributeItem(ARMBuildAttrs::FP_arch,
ARMBuildAttrs::AllowFPARMv8A,
/* OverwriteExisting= */ false);
- setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
- ARMBuildAttrs::AllowNeonARMv8,
- /* OverwriteExisting= */ false);
+ // 'Advanced_SIMD_arch' must be emitted not here, but within
+ // ARMAsmPrinter::emitAttributes(), depending on hasV8Ops() and hasV8_1a()
break;
case ARM::SOFTVFP:
@@ -1362,25 +1363,29 @@ void ARMELFStreamer::emitUnwindRaw(int64_t Offset,
namespace llvm {
-MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst) {
- MCStreamer *S = llvm::createAsmStreamer(
- Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
- new ARMTargetAsmStreamer(*S, OS, *InstPrint, isVerboseAsm);
- return S;
+MCTargetStreamer *createARMTargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new ARMTargetAsmStreamer(S, OS, *InstPrint, isVerboseAsm);
}
MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S) {
return new ARMTargetStreamer(S);
}
+MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
+ const MCSubtargetInfo &STI) {
+ Triple TT(STI.getTargetTriple());
+ if (TT.getObjectFormat() == Triple::ELF)
+ return new ARMTargetELFStreamer(S);
+ return new ARMTargetStreamer(S);
+}
+
MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
bool RelaxAll, bool IsThumb) {
ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb);
- new ARMTargetELFStreamer(*S);
// FIXME: This should eventually end up somewhere else where more
// intelligent flag decisions can be made. For now we are just maintaining
// the status quo for ARM and setting EF_ARM_EABI_VER5 as the default.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 66a1618..caa8736 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -59,6 +59,7 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) {
// Exceptions handling
switch (TheTriple.getOS()) {
+ case Triple::Bitrig:
case Triple::NetBSD:
ExceptionsType = ExceptionHandling::DwarfCFI;
break;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index efbebd3..e48cabb 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -441,14 +441,12 @@ public:
MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new ARMMCCodeEmitter(MCII, Ctx, true);
}
MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new ARMMCCodeEmitter(MCII, Ctx, false);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index 68d32b2..5b90de3 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -10,6 +10,7 @@
#include "ARMMCExpr.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
using namespace llvm;
#define DEBUG_TYPE "armmcexpr"
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index 06bf6c9..2be98d2 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -26,8 +26,8 @@ private:
const VariantKind Kind;
const MCExpr *Expr;
- explicit ARMMCExpr(VariantKind _Kind, const MCExpr *_Expr)
- : Kind(_Kind), Expr(_Expr) {}
+ explicit ARMMCExpr(VariantKind Kind, const MCExpr *Expr)
+ : Kind(Kind), Expr(Expr) {}
public:
/// @name Construction
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 8c19785..7ff7f9a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -153,6 +153,17 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
// Use CPU to figure out the exact features
ARMArchFeature = "+v8";
break;
+ case Triple::ARMSubArch_v8_1a:
+ if (NoCPU)
+ // v8.1a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
+ // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone,
+ // FeatureT2XtPk, FeatureCrypto, FeatureCRC, FeatureV8_1a
+ ARMArchFeature = "+v8.1a,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,"
+ "+trustzone,+t2xtpk,+crypto,+crc";
+ else
+ // Use CPU to figure out the exact features
+ ARMArchFeature = "+v8.1a";
+ break;
case Triple::ARMSubArch_v7m:
isThumb = true;
if (NoCPU)
@@ -195,6 +206,9 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
case Triple::ARMSubArch_v6t2:
ARMArchFeature = "+v6t2";
break;
+ case Triple::ARMSubArch_v6k:
+ ARMArchFeature = "+v6k";
+ break;
case Triple::ARMSubArch_v6m:
isThumb = true;
if (NoCPU)
@@ -295,27 +309,18 @@ static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
-// This is duplicated code. Refactor this.
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- Triple TheTriple(TT);
+static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
+ MCAsmBackend &MAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll) {
+ return createARMELFStreamer(Ctx, MAB, OS, Emitter, false,
+ T.getArch() == Triple::thumb);
+}
- switch (TheTriple.getObjectFormat()) {
- default: llvm_unreachable("unsupported object format");
- case Triple::MachO: {
- MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, false);
- new ARMTargetStreamer(*S);
- return S;
- }
- case Triple::COFF:
- assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
- return createARMWinCOFFStreamer(Ctx, MAB, *Emitter, OS);
- case Triple::ELF:
- return createARMELFStreamer(Ctx, MAB, OS, Emitter, false,
- TheTriple.getArch() == Triple::thumb);
- }
+static MCStreamer *createARMMachOStreamer(MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll,
+ bool DWARFMustBeAtTheEnd) {
+ return createMachOStreamer(Ctx, MAB, OS, Emitter, false, DWARFMustBeAtTheEnd);
}
static MCInstPrinter *createARMMCInstPrinter(const Target &T,
@@ -379,61 +384,53 @@ static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) {
// Force static initialization.
extern "C" void LLVMInitializeARMTargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfoFn X(TheARMLETarget, createARMMCAsmInfo);
- RegisterMCAsmInfoFn Y(TheARMBETarget, createARMMCAsmInfo);
- RegisterMCAsmInfoFn A(TheThumbLETarget, createARMMCAsmInfo);
- RegisterMCAsmInfoFn B(TheThumbBETarget, createARMMCAsmInfo);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(TheARMLETarget, createARMMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheARMBETarget, createARMMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheThumbLETarget,
- createARMMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheThumbBETarget,
- createARMMCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheARMLETarget, createARMMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheARMBETarget, createARMMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheThumbLETarget, createARMMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheThumbBETarget, createARMMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheARMLETarget, createARMMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheARMBETarget, createARMMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheThumbLETarget, createARMMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheThumbBETarget, createARMMCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheARMLETarget,
- ARM_MC::createARMMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheARMBETarget,
- ARM_MC::createARMMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheThumbLETarget,
- ARM_MC::createARMMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheThumbBETarget,
- ARM_MC::createARMMCSubtargetInfo);
-
- // Register the MC instruction analyzer.
- TargetRegistry::RegisterMCInstrAnalysis(TheARMLETarget,
- createARMMCInstrAnalysis);
- TargetRegistry::RegisterMCInstrAnalysis(TheARMBETarget,
- createARMMCInstrAnalysis);
- TargetRegistry::RegisterMCInstrAnalysis(TheThumbLETarget,
- createARMMCInstrAnalysis);
- TargetRegistry::RegisterMCInstrAnalysis(TheThumbBETarget,
- createARMMCInstrAnalysis);
+ for (Target *T : {&TheARMLETarget, &TheARMBETarget, &TheThumbLETarget,
+ &TheThumbBETarget}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(*T, createARMMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createARMMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createARMMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createARMMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T,
+ ARM_MC::createARMMCSubtargetInfo);
+
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(*T, createARMMCInstrAnalysis);
+
+ TargetRegistry::RegisterELFStreamer(*T, createELFStreamer);
+ TargetRegistry::RegisterCOFFStreamer(*T, createARMWinCOFFStreamer);
+ TargetRegistry::RegisterMachOStreamer(*T, createARMMachOStreamer);
+
+ // Register the obj target streamer.
+ TargetRegistry::RegisterObjectTargetStreamer(*T,
+ createARMObjectTargetStreamer);
+
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(*T, createARMTargetAsmStreamer);
+
+ // Register the null TargetStreamer.
+ TargetRegistry::RegisterNullTargetStreamer(*T, createARMNullTargetStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createARMMCInstPrinter);
+
+ // Register the MC relocation info.
+ TargetRegistry::RegisterMCRelocationInfo(*T, createARMMCRelocationInfo);
+ }
// Register the MC Code Emitter
- TargetRegistry::RegisterMCCodeEmitter(TheARMLETarget,
- createARMLEMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheARMBETarget,
- createARMBEMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheThumbLETarget,
- createARMLEMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheThumbBETarget,
- createARMBEMCCodeEmitter);
+ for (Target *T : {&TheARMLETarget, &TheThumbLETarget})
+ TargetRegistry::RegisterMCCodeEmitter(*T, createARMLEMCCodeEmitter);
+ for (Target *T : {&TheARMBETarget, &TheThumbBETarget})
+ TargetRegistry::RegisterMCCodeEmitter(*T, createARMBEMCCodeEmitter);
// Register the asm backend.
TargetRegistry::RegisterMCAsmBackend(TheARMLETarget, createARMLEAsmBackend);
@@ -442,44 +439,4 @@ extern "C" void LLVMInitializeARMTargetMC() {
createThumbLEAsmBackend);
TargetRegistry::RegisterMCAsmBackend(TheThumbBETarget,
createThumbBEAsmBackend);
-
- // Register the object streamer.
- TargetRegistry::RegisterMCObjectStreamer(TheARMLETarget, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheARMBETarget, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheThumbLETarget, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheThumbBETarget, createMCStreamer);
-
- // Register the asm streamer.
- TargetRegistry::RegisterAsmStreamer(TheARMLETarget, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheARMBETarget, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheThumbLETarget, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheThumbBETarget, createMCAsmStreamer);
-
- // Register the null TargetStreamer.
- TargetRegistry::RegisterNullTargetStreamer(TheARMLETarget,
- createARMNullTargetStreamer);
- TargetRegistry::RegisterNullTargetStreamer(TheARMBETarget,
- createARMNullTargetStreamer);
- TargetRegistry::RegisterNullTargetStreamer(TheThumbLETarget,
- createARMNullTargetStreamer);
- TargetRegistry::RegisterNullTargetStreamer(TheThumbBETarget,
- createARMNullTargetStreamer);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(TheARMLETarget, createARMMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheARMBETarget, createARMMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheThumbLETarget,
- createARMMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheThumbBETarget,
- createARMMCInstPrinter);
-
- // Register the MC relocation info.
- TargetRegistry::RegisterMCRelocationInfo(TheARMLETarget,
- createARMMCRelocationInfo);
- TargetRegistry::RegisterMCRelocationInfo(TheARMBETarget,
- createARMMCRelocationInfo);
- TargetRegistry::RegisterMCRelocationInfo(TheThumbLETarget,
- createARMMCRelocationInfo);
- TargetRegistry::RegisterMCRelocationInfo(TheThumbBETarget,
- createARMMCRelocationInfo);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index c17e959..7e9ba66 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -32,6 +32,7 @@ class MCRelocationInfo;
class MCTargetStreamer;
class StringRef;
class Target;
+class Triple;
class raw_ostream;
extern Target TheARMLETarget, TheThumbLETarget;
@@ -47,21 +48,20 @@ namespace ARM_MC {
StringRef FS);
}
-MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst);
-
MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S);
+MCTargetStreamer *createARMTargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm);
+MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
+ const MCSubtargetInfo &STI);
MCCodeEmitter *createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCCodeEmitter *createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI,
@@ -80,10 +80,11 @@ MCAsmBackend *createThumbLEAsmBackend(const Target &T, const MCRegisterInfo &MRI
MCAsmBackend *createThumbBEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
StringRef TT, StringRef CPU);
-/// createARMWinCOFFStreamer - Construct a PE/COFF machine code streamer which
-/// will generate a PE/COFF object file.
+// Construct a PE/COFF machine code streamer which will generate a PE/COFF
+// object file.
MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
- MCCodeEmitter &Emitter, raw_ostream &OS);
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll);
/// createARMELFObjectWriter - Construct an ELF Mach-O object writer.
MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
index 593fe34..173cc93 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -72,14 +72,10 @@ void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
// opcode when r4 is not in .save directive.
// Compute the consecutive registers from r4 to r11.
- uint32_t Range = 0;
- uint32_t Mask = (1u << 4);
- for (uint32_t Bit = (1u << 5); Bit < (1u << 12); Bit <<= 1) {
- if ((RegSave & Bit) == 0u)
- break;
- ++Range;
- Mask |= Bit;
- }
+ uint32_t Mask = RegSave & 0xff0u;
+ uint32_t Range = countTrailingOnes(Mask >> 5); // Exclude r4.
+ // Mask off non-consecutive registers. Keep r4.
+ Mask &= ~(0xffffffe0u << Range);
// Emit this opcode when the mask covers every registers.
uint32_t UnmaskedReg = RegSave & 0xfff0u & (~Mask);
@@ -105,50 +101,24 @@ void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
/// Emit unwind opcodes for .vsave directives
void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) {
- size_t i = 32;
-
- while (i > 16) {
- uint32_t Bit = 1u << (i - 1);
- if ((VFPRegSave & Bit) == 0u) {
- --i;
- continue;
- }
-
- uint32_t Range = 0;
-
- --i;
- Bit >>= 1;
-
- while (i > 16 && (VFPRegSave & Bit)) {
- --i;
- ++Range;
- Bit >>= 1;
+ // We only have 4 bits to save the offset in the opcode so look at the lower
+ // and upper 16 bits separately.
+ for (uint32_t Regs : {VFPRegSave & 0xffff0000u, VFPRegSave & 0x0000ffffu}) {
+ while (Regs) {
+ // Now look for a run of set bits. Remember the MSB and LSB of the run.
+ auto RangeMSB = 32 - countLeadingZeros(Regs);
+ auto RangeLen = countLeadingOnes(Regs << (32 - RangeMSB));
+ auto RangeLSB = RangeMSB - RangeLen;
+
+ int Opcode = RangeLSB >= 16
+ ? ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16
+ : ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD;
+
+ EmitInt16(Opcode | ((RangeLSB % 16) << 4) | (RangeLen - 1));
+
+ // Zero out bits we're done with.
+ Regs &= ~(-1u << RangeLSB);
}
-
- EmitInt16(ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 |
- ((i - 16) << 4) | Range);
- }
-
- while (i > 0) {
- uint32_t Bit = 1u << (i - 1);
- if ((VFPRegSave & Bit) == 0u) {
- --i;
- continue;
- }
-
- uint32_t Range = 0;
-
- --i;
- Bit >>= 1;
-
- while (i > 0 && (VFPRegSave & Bit)) {
- --i;
- ++Range;
- Bit >>= 1;
- }
-
- EmitInt16(ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) |
- Range);
}
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index b344ced..dc707dc 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -37,10 +37,10 @@ void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) {
}
}
-namespace llvm {
-MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
- MCCodeEmitter &Emitter, raw_ostream &OS) {
- return new ARMWinCOFFStreamer(Context, MAB, Emitter, OS);
-}
+MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context,
+ MCAsmBackend &MAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll) {
+ return new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS);
}
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 51e519d..ed2deea 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -382,6 +382,9 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
TRI = Fn.getSubtarget().getRegisterInfo();
MRI = &Fn.getRegInfo();
const ARMSubtarget *STI = &Fn.getSubtarget<ARMSubtarget>();
+ // Only run this for CortexA9.
+ if (!STI->isCortexA9())
+ return false;
isLikeA9 = STI->isLikeA9() || STI->isSwift();
isSwift = STI->isSwift();
diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
index f4d9be3..2d031d0 100644
--- a/lib/Target/ARM/README-Thumb.txt
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -232,7 +232,7 @@ Make use of hi register variants of cmp: tCMPhir / tCMPZhir.
//===---------------------------------------------------------------------===//
Thumb1 immediate field sometimes keep pre-scaled values. See
-Thumb1RegisterInfo::eliminateFrameIndex. This is inconsistent from ARM and
+ThumbRegisterInfo::eliminateFrameIndex. This is inconsistent from ARM and
Thumb2.
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 7dcc64e..c496cd7 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -41,7 +41,7 @@ static void
emitSPUpdate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
const TargetInstrInfo &TII, DebugLoc dl,
- const Thumb1RegisterInfo &MRI,
+ const ThumbRegisterInfo &MRI,
int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) {
emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
MRI, MIFlags);
@@ -53,8 +53,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
- const Thumb1RegisterInfo *RegInfo =
- static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo());
+ const ThumbRegisterInfo *RegInfo =
+ static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
if (!hasReservedCallFrame(MF)) {
// If we have alloca, convert as follows:
// ADJCALLSTACKDOWN -> sub, sp, sp, amount
@@ -89,13 +89,12 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- const Thumb1RegisterInfo *RegInfo =
- static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo());
+ const ThumbRegisterInfo *RegInfo =
+ static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
- unsigned Align = STI.getFrameLowering()->getStackAlignment();
- unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
unsigned NumBytes = MFI->getStackSize();
assert(NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");
@@ -328,17 +327,16 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const Thumb1RegisterInfo *RegInfo =
- static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo());
+ const ThumbRegisterInfo *RegInfo =
+ static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
- unsigned Align = STI.getFrameLowering()->getStackAlignment();
- unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
int NumBytes = (int)MFI->getStackSize();
assert((unsigned)NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs();
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
unsigned FramePtr = RegInfo->getFrameRegister(MF);
if (!AFI->hasStackFrame()) {
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index b785b28..cf93203 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -16,7 +16,7 @@
#include "ARMFrameLowering.h"
#include "Thumb1InstrInfo.h"
-#include "Thumb1RegisterInfo.h"
+#include "ThumbRegisterInfo.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index c24f740..29aaa15 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -22,8 +22,7 @@
using namespace llvm;
Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI(STI) {
-}
+ : ARMBaseInstrInfo(STI), RI() {}
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
void Thumb1InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 9fba760..f3f493d 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -15,13 +15,13 @@
#define LLVM_LIB_TARGET_ARM_THUMB1INSTRINFO_H
#include "ARMBaseInstrInfo.h"
-#include "Thumb1RegisterInfo.h"
+#include "ThumbRegisterInfo.h"
namespace llvm {
class ARMSubtarget;
class Thumb1InstrInfo : public ARMBaseInstrInfo {
- Thumb1RegisterInfo RI;
+ ThumbRegisterInfo RI;
public:
explicit Thumb1InstrInfo(const ARMSubtarget &STI);
@@ -36,7 +36,7 @@ public:
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
- const Thumb1RegisterInfo &getRegisterInfo() const override { return RI; }
+ const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index b657f2d..7bb2265 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -255,6 +255,8 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
const ARMSubtarget &STI =
static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+ if (!STI.isThumb2())
+ return false;
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
TRI = STI.getRegisterInfo();
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 62c3752..26ca7e9 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -30,8 +30,7 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
cl::init(false));
Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI(STI) {
-}
+ : ARMBaseInstrInfo(STI), RI() {}
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 46a1f6d..916ab06 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -15,14 +15,14 @@
#define LLVM_LIB_TARGET_ARM_THUMB2INSTRINFO_H
#include "ARMBaseInstrInfo.h"
-#include "Thumb2RegisterInfo.h"
+#include "ThumbRegisterInfo.h"
namespace llvm {
class ARMSubtarget;
class ScheduleHazardRecognizer;
class Thumb2InstrInfo : public ARMBaseInstrInfo {
- Thumb2RegisterInfo RI;
+ ThumbRegisterInfo RI;
public:
explicit Thumb2InstrInfo(const ARMSubtarget &STI);
@@ -60,7 +60,7 @@ public:
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
- const Thumb2RegisterInfo &getRegisterInfo() const override { return RI; }
+ const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
private:
void expandLoadStackGuard(MachineBasicBlock::iterator MI,
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
deleted file mode 100644
index 0d5d85a..0000000
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-//===-- Thumb2RegisterInfo.cpp - Thumb-2 Register Information -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Thumb-2 implementation of the TargetRegisterInfo
-// class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Thumb2RegisterInfo.h"
-#include "ARM.h"
-#include "ARMSubtarget.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-using namespace llvm;
-
-Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMSubtarget &sti)
- : ARMBaseRegisterInfo(sti) {
-}
-
-/// emitLoadConstPool - Emits a load from constpool to materialize the
-/// specified immediate.
-void
-Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- DebugLoc dl,
- unsigned DestReg, unsigned SubIdx,
- int Val,
- ARMCC::CondCodes Pred, unsigned PredReg,
- unsigned MIFlags) const {
- MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- MachineConstantPool *ConstantPool = MF.getConstantPool();
- const Constant *C = ConstantInt::get(
- Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
- unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
-
- BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
- .addReg(DestReg, getDefRegState(true), SubIdx)
- .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0)
- .setMIFlags(MIFlags);
-}
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
deleted file mode 100644
index 1dd94cc..0000000
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Thumb-2 implementation of the TargetRegisterInfo
-// class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H
-#define LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H
-
-#include "ARMBaseRegisterInfo.h"
-
-namespace llvm {
-
-class ARMSubtarget;
-
-struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
-public:
- Thumb2RegisterInfo(const ARMSubtarget &STI);
-
- /// emitLoadConstPool - Emits a load from constpool to materialize the
- /// specified immediate.
- void
- emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- DebugLoc dl, unsigned DestReg, unsigned SubIdx, int Val,
- ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0,
- unsigned MIFlags = MachineInstr::NoFlags) const override;
-};
-}
-
-#endif
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 2ee908b..e967e53 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/Function.h" // To access Function attributes
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -1002,6 +1003,9 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ if (STI->isThumb1Only() || STI->prefers32BitThumb())
+ return false;
+
TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
// Optimizing / minimizing size?
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/ThumbRegisterInfo.cpp
index 5e2cbdc..b5f9d7e 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===-- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------------===//
+//===-- ThumbRegisterInfo.cpp - Thumb-1 Register Information -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-#include "Thumb1RegisterInfo.h"
+#include "ThumbRegisterInfo.h"
#include "ARMBaseInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
@@ -38,39 +38,35 @@ extern cl::opt<bool> ReuseFrameIndexVals;
using namespace llvm;
-Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMSubtarget &sti)
- : ARMBaseRegisterInfo(sti) {
-}
+ThumbRegisterInfo::ThumbRegisterInfo() : ARMBaseRegisterInfo() {}
+
+const TargetRegisterClass *
+ThumbRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const {
+ if (!MF.getSubtarget<ARMSubtarget>().isThumb1Only())
+ return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC, MF);
-const TargetRegisterClass*
-Thumb1RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
- const {
if (ARM::tGPRRegClass.hasSubClassEq(RC))
return &ARM::tGPRRegClass;
- return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC);
+ return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC, MF);
}
const TargetRegisterClass *
-Thumb1RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
- const {
+ThumbRegisterInfo::getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const {
+ if (!MF.getSubtarget<ARMSubtarget>().isThumb1Only())
+ return ARMBaseRegisterInfo::getPointerRegClass(MF, Kind);
return &ARM::tGPRRegClass;
}
-/// emitLoadConstPool - Emits a load from constpool to materialize the
-/// specified immediate.
-void
-Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- DebugLoc dl,
- unsigned DestReg, unsigned SubIdx,
- int Val,
- ARMCC::CondCodes Pred, unsigned PredReg,
- unsigned MIFlags) const {
- assert((isARMLowRegister(DestReg) ||
- isVirtualRegister(DestReg)) &&
- "Thumb1 does not have ldr to high register");
-
+static void emitThumb1LoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl, unsigned DestReg,
+ unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned MIFlags) {
MachineFunction &MF = *MBB.getParent();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
const TargetInstrInfo &TII = *STI.getInstrInfo();
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C = ConstantInt::get(
@@ -83,6 +79,42 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
.setMIFlags(MIFlags);
}
+static void emitThumb2LoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl, unsigned DestReg,
+ unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned MIFlags) {
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ const Constant *C = ConstantInt::get(
+ Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0)
+ .setMIFlags(MIFlags);
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void ThumbRegisterInfo::emitLoadConstPool(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred,
+ unsigned PredReg, unsigned MIFlags) const {
+ MachineFunction &MF = *MBB.getParent();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ if (STI.isThumb1Only()) {
+ assert((isARMLowRegister(DestReg) || isVirtualRegister(DestReg)) &&
+ "Thumb1 does not have ldr to high register");
+ return emitThumb1LoadConstPool(MBB, MBBI, dl, DestReg, SubIdx, Val, Pred,
+ PredReg, MIFlags);
+ }
+ return emitThumb2LoadConstPool(MBB, MBBI, dl, DestReg, SubIdx, Val, Pred,
+ PredReg, MIFlags);
+}
/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
@@ -317,12 +349,14 @@ static unsigned convertToNonSPOpcode(unsigned Opcode) {
return Opcode;
}
-bool Thumb1RegisterInfo::
-rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
- unsigned FrameReg, int &Offset,
- const ARMBaseInstrInfo &TII) const {
+bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II,
+ unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const {
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
+ assert(MBB.getParent()->getSubtarget<ARMSubtarget>().isThumb1Only() &&
+ "This isn't needed for thumb2!");
DebugLoc dl = MI.getDebugLoc();
MachineInstrBuilder MIB(*MBB.getParent(), &MI);
unsigned Opcode = MI.getOpcode();
@@ -386,8 +420,13 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
return Offset == 0;
}
-void Thumb1RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+void ThumbRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
int64_t Offset) const {
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ if (!STI.isThumb1Only())
+ return ARMBaseRegisterInfo::resolveFrameIndex(MI, BaseReg, Offset);
+
const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
int Off = Offset; // ARM doesn't need the general 64-bit offsets
unsigned i = 0;
@@ -403,12 +442,15 @@ void Thumb1RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
/// saveScavengerRegister - Spill the register so it can be used by the
/// register scavenger. Return true.
-bool
-Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator &UseMI,
- const TargetRegisterClass *RC,
- unsigned Reg) const {
+bool ThumbRegisterInfo::saveScavengerRegister(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI, const TargetRegisterClass *RC,
+ unsigned Reg) const {
+
+ const ARMSubtarget &STI = MBB.getParent()->getSubtarget<ARMSubtarget>();
+ if (!STI.isThumb1Only())
+ return ARMBaseRegisterInfo::saveScavengerRegister(MBB, I, UseMI, RC, Reg);
+
// Thumb1 can't use the emergency spill slot on the stack because
// ldr/str immediate offsets must be positive, and if we're referencing
// off the frame pointer (if, for example, there are alloca() calls in
@@ -452,14 +494,18 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
return true;
}
-void
-Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS) const {
- unsigned VReg = 0;
+void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ if (!STI.isThumb1Only())
+ return ARMBaseRegisterInfo::eliminateFrameIndex(II, SPAdj, FIOperandNum,
+ RS);
+
+ unsigned VReg = 0;
const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc dl = MI.getDebugLoc();
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/ThumbRegisterInfo.h
index 5feaf52..23aaff3 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/ThumbRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl -*- C++ -*-===//
+//===- ThumbRegisterInfo.h - Thumb Register Information Impl -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,8 +7,9 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the Thumb-1 implementation of the TargetRegisterInfo
-// class.
+// This file contains the Thumb implementation of the TargetRegisterInfo
+// class. With the exception of emitLoadConstPool Thumb2 tracks
+// ARMBaseRegisterInfo, Thumb1 overloads the functions below.
//
//===----------------------------------------------------------------------===//
@@ -22,12 +23,13 @@ namespace llvm {
class ARMSubtarget;
class ARMBaseInstrInfo;
-struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
+struct ThumbRegisterInfo : public ARMBaseRegisterInfo {
public:
- Thumb1RegisterInfo(const ARMSubtarget &STI);
+ ThumbRegisterInfo();
const TargetRegisterClass *
- getLargestLegalSuperClass(const TargetRegisterClass *RC) const override;
+ getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const override;
const TargetRegisterClass *
getPointerRegClass(const MachineFunction &MF,