aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARM.h4
-rw-r--r--lib/Target/ARM/ARM.td36
-rw-r--r--lib/Target/ARM/ARMArchExtName.def30
-rw-r--r--lib/Target/ARM/ARMArchExtName.h26
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp278
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h22
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp51
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h4
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp38
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h2
-rw-r--r--lib/Target/ARM/ARMCallingConv.h161
-rw-r--r--lib/Target/ARM/ARMCallingConv.td2
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp14
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp18
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp57
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp255
-rw-r--r--lib/Target/ARM/ARMFrameLowering.h2
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.cpp5
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp37
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp711
-rw-r--r--lib/Target/ARM/ARMISelLowering.h19
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp26
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td265
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td20
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td25
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td106
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td18
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp68
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp36
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.cpp4
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h2
-rw-r--r--lib/Target/ARM/ARMOptimizeBarriersPass.cpp2
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td10
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp9
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp122
-rw-r--r--lib/Target/ARM/ARMSubtarget.h36
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp155
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h14
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp10
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp215
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.h134
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp510
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp37
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp65
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp21
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp61
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp26
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp72
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp37
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp14
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp2
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp54
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp2
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp23
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp10
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp9
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp13
62 files changed, 2557 insertions, 1460 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 02db53a..d3cc068 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -34,16 +34,12 @@ FunctionPass *createA15SDOptimizerPass();
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
FunctionPass *createARMGlobalBaseRegPass();
-FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
FunctionPass *createARMConstantIslandPass();
FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass();
FunctionPass *createARMOptimizeBarriersPass();
FunctionPass *createThumb2SizeReductionPass();
-/// \brief Creates an ARM-specific Target Transformation Info pass.
-ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM);
-
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 80b976b..f080c60 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -147,6 +147,11 @@ def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass",
def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
"NaCl trap">;
+// RenderScript-specific support for 64-bit long types on all targets
+def FeatureLong64 : SubtargetFeature<"long64", "UseLong64",
+ "true",
+ "long type is forced to be 64-bit">;
+
// ARM ISAs.
def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true",
"Support ARM v4T instructions">;
@@ -270,17 +275,6 @@ def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
FeatureHWDivARM]>;
-def FeatureAPCS : SubtargetFeature<"apcs", "TargetABI", "ARM_ABI_APCS",
- "Use the APCS ABI">;
-
-def FeatureAAPCS : SubtargetFeature<"aapcs", "TargetABI", "ARM_ABI_AAPCS",
- "Use the AAPCS ABI">;
-
-// RenderScript-specific support for 64-bit long types on all targets
-def FeatureLong64 : SubtargetFeature<"long64", "UseLong64",
- "true",
- "long type is forced to be 64-bit">;
-
class ProcNoItin<string Name, list<SubtargetFeature> Features>
: Processor<Name, NoItineraries, Features>;
@@ -336,6 +330,12 @@ def : Processor<"mpcore", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
// V6M Processors.
def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
FeatureDB, FeatureMClass]>;
+def : Processor<"cortex-m0plus", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
+ FeatureDB, FeatureMClass]>;
+def : Processor<"cortex-m1", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
+ FeatureDB, FeatureMClass]>;
+def : Processor<"sc000", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
+ FeatureDB, FeatureMClass]>;
// V6T2 Processors.
def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops,
@@ -395,10 +395,20 @@ def : ProcessorModel<"cortex-r5", CortexA8Model,
FeatureHasRAS, FeatureVFPOnlySP,
FeatureD16, FeatureRClass]>;
+// FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5.
+def : ProcessorModel<"cortex-r7", CortexA8Model,
+ [ProcR5, HasV7Ops, FeatureDB,
+ FeatureVFP3, FeatureDSPThumb2,
+ FeatureHasRAS, FeatureVFPOnlySP,
+ FeatureD16, FeatureMP, FeatureRClass]>;
+
// V7M Processors.
def : ProcNoItin<"cortex-m3", [HasV7Ops,
FeatureThumb2, FeatureNoARM, FeatureDB,
FeatureHWDiv, FeatureMClass]>;
+def : ProcNoItin<"sc300", [HasV7Ops,
+ FeatureThumb2, FeatureNoARM, FeatureDB,
+ FeatureHWDiv, FeatureMClass]>;
// V7EM Processors.
def : ProcNoItin<"cortex-m4", [HasV7Ops,
@@ -427,6 +437,10 @@ def : ProcNoItin<"cortex-a53", [ProcA53, HasV8Ops, FeatureAClass,
def : ProcNoItin<"cortex-a57", [ProcA57, HasV8Ops, FeatureAClass,
FeatureDB, FeatureFPARMv8,
FeatureNEON, FeatureDSPThumb2]>;
+// FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
+def : ProcNoItin<"cortex-a72", [ProcA57, HasV8Ops, FeatureAClass,
+ FeatureDB, FeatureFPARMv8,
+ FeatureNEON, FeatureDSPThumb2]>;
// Cyclone is very similar to swift
def : ProcessorModel<"cyclone", SwiftModel,
diff --git a/lib/Target/ARM/ARMArchExtName.def b/lib/Target/ARM/ARMArchExtName.def
new file mode 100644
index 0000000..d6da50c
--- /dev/null
+++ b/lib/Target/ARM/ARMArchExtName.def
@@ -0,0 +1,30 @@
+//===-- ARMArchExtName.def - List of the ARM Extension names ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the list of the supported ARM Architecture Extension
+// names. These can be used to enable the extension through .arch_extension
+// attribute
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+#ifndef ARM_ARCHEXT_NAME
+#error "You must define ARM_ARCHEXT_NAME(NAME, ID) before including ARMArchExtName.h"
+#endif
+
+ARM_ARCHEXT_NAME("crc", CRC)
+ARM_ARCHEXT_NAME("crypto", CRYPTO)
+ARM_ARCHEXT_NAME("fp", FP)
+ARM_ARCHEXT_NAME("idiv", HWDIV)
+ARM_ARCHEXT_NAME("mp", MP)
+ARM_ARCHEXT_NAME("sec", SEC)
+ARM_ARCHEXT_NAME("virt", VIRT)
+
+#undef ARM_ARCHEXT_NAME
diff --git a/lib/Target/ARM/ARMArchExtName.h b/lib/Target/ARM/ARMArchExtName.h
new file mode 100644
index 0000000..bc1157a
--- /dev/null
+++ b/lib/Target/ARM/ARMArchExtName.h
@@ -0,0 +1,26 @@
+//===-- ARMArchExtName.h - List of the ARM Extension names ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMARCHEXTNAME_H
+#define LLVM_LIB_TARGET_ARM_ARMARCHEXTNAME_H
+
+namespace llvm {
+namespace ARM {
+
+enum ArchExtKind {
+ INVALID_ARCHEXT = 0
+
+#define ARM_ARCHEXT_NAME(NAME, ID) , ID
+#include "ARMArchExtName.def"
+};
+
+} // namespace ARM
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 695fd4d..2544a01 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -16,6 +16,7 @@
#include "ARM.h"
#include "ARMConstantPoolValue.h"
#include "ARMFPUName.h"
+#include "ARMArchExtName.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
@@ -57,6 +58,11 @@ using namespace llvm;
#define DEBUG_TYPE "asm-printer"
+ARMAsmPrinter::ARMAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)), AFI(nullptr), MCP(nullptr),
+ InConstantPool(false) {}
+
void ARMAsmPrinter::EmitFunctionBodyEnd() {
// Make sure to terminate any constant pools that were at the end
// of the function.
@@ -76,8 +82,7 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
}
void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
- uint64_t Size =
- TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(CV->getType());
+ uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
assert(Size && "C++ constructor pointer had zero size!");
const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
@@ -99,6 +104,7 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
AFI = MF.getInfo<ARMFunctionInfo>();
MCP = MF.getConstantPool();
+ Subtarget = &MF.getSubtarget<ARMSubtarget>();
SetupMachineFunction(MF);
@@ -120,6 +126,23 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Emit the rest of the function body.
EmitFunctionBody();
+ // If we need V4T thumb mode Register Indirect Jump pads, emit them.
+ // These are created per function, rather than per TU, since it's
+ // relatively easy to exceed the thumb branch range within a TU.
+ if (! ThumbIndirectPads.empty()) {
+ OutStreamer.EmitAssemblerFlag(MCAF_Code16);
+ EmitAlignment(1);
+ for (unsigned i = 0, e = ThumbIndirectPads.size(); i < e; i++) {
+ OutStreamer.EmitLabel(ThumbIndirectPads[i].second);
+ EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tBX)
+ .addReg(ThumbIndirectPads[i].first)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ }
+ ThumbIndirectPads.clear();
+ }
+
// We didn't modify anything.
return false;
}
@@ -183,7 +206,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
MCSymbol *ARMAsmPrinter::
GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
- const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *DL = TM.getDataLayout();
SmallString<60> Name;
raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI"
<< getFunctionNumber() << '_' << uid << '_' << uid2;
@@ -192,7 +215,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const {
- const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *DL = TM.getDataLayout();
SmallString<60> Name;
raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "SJLJEH"
<< getFunctionNumber();
@@ -414,7 +437,8 @@ void ARMAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
}
void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
- if (Subtarget->isTargetMachO()) {
+ Triple TT(TM.getTargetTriple());
+ if (TT.isOSBinFormatMachO()) {
Reloc::Model RelocM = TM.getRelocationModel();
if (RelocM == Reloc::PIC_ || RelocM == Reloc::DynamicNoPIC) {
// Declare all the text sections up front (before the DWARF sections
@@ -477,10 +501,17 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified);
// Emit ARM Build Attributes
- if (Subtarget->isTargetELF())
+ if (TT.isOSBinFormatELF())
emitAttributes();
- if (!M.getModuleInlineAsm().empty() && Subtarget->isThumb())
+ // Use the triple's architecture and subarchitecture to determine
+ // if we're thumb for the purposes of the top level code16 assembler
+ // flag.
+ bool isThumb = TT.getArch() == Triple::thumb ||
+ TT.getArch() == Triple::thumbeb ||
+ TT.getSubArch() == Triple::ARMSubArch_v7m ||
+ TT.getSubArch() == Triple::ARMSubArch_v6m;
+ if (!M.getModuleInlineAsm().empty() && isThumb)
OutStreamer.EmitAssemblerFlag(MCAF_Code16);
}
@@ -509,7 +540,8 @@ emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel,
void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
- if (Subtarget->isTargetMachO()) {
+ Triple TT(TM.getTargetTriple());
+ if (TT.isOSBinFormatMachO()) {
// All darwin targets use mach-o.
const TargetLoweringObjectFileMachO &TLOFMacho =
static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
@@ -552,7 +584,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
}
// Emit a .data.rel section containing any stubs that were created.
- if (Subtarget->isTargetELF()) {
+ if (TT.isOSBinFormatELF()) {
const TargetLoweringObjectFileELF &TLOFELF =
static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
@@ -562,7 +594,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
if (!Stubs.empty()) {
OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
- const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *TD = TM.getDataLayout();
for (auto &stub: Stubs) {
OutStreamer.EmitLabel(stub.first);
@@ -612,69 +644,96 @@ void ARMAsmPrinter::emitAttributes() {
MCTargetStreamer &TS = *OutStreamer.getTargetStreamer();
ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
- ATS.switchVendor("aeabi");
+ ATS.emitTextAttribute(ARMBuildAttrs::conformance, "2.09");
- std::string CPUString = Subtarget->getCPUString();
+ ATS.switchVendor("aeabi");
- // FIXME: remove krait check when GNU tools support krait cpu
- if (CPUString != "generic" && CPUString != "krait")
- ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
+ // Compute ARM ELF Attributes based on the default subtarget that
+ // we'd have constructed. The existing ARM behavior isn't LTO clean
+ // anyhow.
+ // FIXME: For ifunc related functions we could iterate over and look
+ // for a feature string that doesn't match the default one.
+ StringRef TT = TM.getTargetTriple();
+ StringRef CPU = TM.getTargetCPU();
+ StringRef FS = TM.getTargetFeatureString();
+ std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU);
+ if (!FS.empty()) {
+ if (!ArchFS.empty())
+ ArchFS = ArchFS + "," + FS.str();
+ else
+ ArchFS = FS;
+ }
+ const ARMBaseTargetMachine &ATM =
+ static_cast<const ARMBaseTargetMachine &>(TM);
+ const ARMSubtarget STI(TT, CPU, ArchFS, ATM, ATM.isLittleEndian());
+
+ std::string CPUString = STI.getCPUString();
+
+ if (CPUString != "generic") {
+ // FIXME: remove krait check when GNU tools support krait cpu
+ if (STI.isKrait()) {
+ ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9");
+ // We consider krait as a "cortex-a9" + hwdiv CPU
+ // Enable hwdiv through ".arch_extension idiv"
+ if (STI.hasDivide() || STI.hasDivideInARMMode())
+ ATS.emitArchExtension(ARM::HWDIV);
+ } else
+ ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
+ }
- ATS.emitAttribute(ARMBuildAttrs::CPU_arch,
- getArchForCPU(CPUString, Subtarget));
+ ATS.emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(CPUString, &STI));
// Tag_CPU_arch_profile must have the default value of 0 when "Architecture
// profile is not applicable (e.g. pre v7, or cross-profile code)".
- if (Subtarget->hasV7Ops()) {
- if (Subtarget->isAClass()) {
+ if (STI.hasV7Ops()) {
+ if (STI.isAClass()) {
ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
ARMBuildAttrs::ApplicationProfile);
- } else if (Subtarget->isRClass()) {
+ } else if (STI.isRClass()) {
ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
ARMBuildAttrs::RealTimeProfile);
- } else if (Subtarget->isMClass()) {
+ } else if (STI.isMClass()) {
ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
ARMBuildAttrs::MicroControllerProfile);
}
}
- ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use, Subtarget->hasARMOps() ?
- ARMBuildAttrs::Allowed : ARMBuildAttrs::Not_Allowed);
- if (Subtarget->isThumb1Only()) {
- ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::Allowed);
- } else if (Subtarget->hasThumb2()) {
+ ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use,
+ STI.hasARMOps() ? ARMBuildAttrs::Allowed
+ : ARMBuildAttrs::Not_Allowed);
+ if (STI.isThumb1Only()) {
+ ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed);
+ } else if (STI.hasThumb2()) {
ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
ARMBuildAttrs::AllowThumb32);
}
- if (Subtarget->hasNEON()) {
+ if (STI.hasNEON()) {
/* NEON is not exactly a VFP architecture, but GAS emit one of
* neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
- if (Subtarget->hasFPARMv8()) {
- if (Subtarget->hasCrypto())
+ if (STI.hasFPARMv8()) {
+ if (STI.hasCrypto())
ATS.emitFPU(ARM::CRYPTO_NEON_FP_ARMV8);
else
ATS.emitFPU(ARM::NEON_FP_ARMV8);
- }
- else if (Subtarget->hasVFP4())
+ } else if (STI.hasVFP4())
ATS.emitFPU(ARM::NEON_VFPV4);
else
ATS.emitFPU(ARM::NEON);
// Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
- if (Subtarget->hasV8Ops())
+ if (STI.hasV8Ops())
ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
ARMBuildAttrs::AllowNeonARMv8);
} else {
- if (Subtarget->hasFPARMv8())
+ if (STI.hasFPARMv8())
// FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
// FPU, but there are two different names for it depending on the CPU.
- ATS.emitFPU(Subtarget->hasD16() ? ARM::FPV5_D16 : ARM::FP_ARMV8);
- else if (Subtarget->hasVFP4())
- ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV4_D16 : ARM::VFPV4);
- else if (Subtarget->hasVFP3())
- ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV3_D16 : ARM::VFPV3);
- else if (Subtarget->hasVFP2())
+ ATS.emitFPU(STI.hasD16() ? ARM::FPV5_D16 : ARM::FP_ARMV8);
+ else if (STI.hasVFP4())
+ ATS.emitFPU(STI.hasD16() ? ARM::VFPV4_D16 : ARM::VFPV4);
+ else if (STI.hasVFP3())
+ ATS.emitFPU(STI.hasD16() ? ARM::VFPV3_D16 : ARM::VFPV3);
+ else if (STI.hasVFP2())
ATS.emitFPU(ARM::VFPV2);
}
@@ -694,11 +753,42 @@ void ARMAsmPrinter::emitAttributes() {
// Signal various FP modes.
if (!TM.Options.UnsafeFPMath) {
- ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::Allowed);
- ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
- ARMBuildAttrs::Allowed);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+ ARMBuildAttrs::IEEEDenormals);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions, ARMBuildAttrs::Allowed);
+
+ // If the user has permitted this code to choose the IEEE 754
+ // rounding at run-time, emit the rounding attribute.
+ if (TM.Options.HonorSignDependentRoundingFPMathOption)
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_rounding, ARMBuildAttrs::Allowed);
+ } else {
+ if (!STI.hasVFP2()) {
+ // When the target doesn't have an FPU (by design or
+ // intention), the assumptions made on the software support
+ // mirror that of the equivalent hardware support *if it
+ // existed*. For v7 and better we indicate that denormals are
+ // flushed preserving sign, and for V6 we indicate that
+ // denormals are flushed to positive zero.
+ if (STI.hasV7Ops())
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+ ARMBuildAttrs::PreserveFPSign);
+ } else if (STI.hasVFP3()) {
+ // In VFPv4, VFPv4U, VFPv3, or VFPv3U, it is preserved. That is,
+ // the sign bit of the zero matches the sign bit of the input or
+ // result that is being flushed to zero.
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+ ARMBuildAttrs::PreserveFPSign);
+ }
+ // For VFPv2 implementations it is implementation defined as
+ // to whether denormals are flushed to positive zero or to
+ // whatever the sign of zero is (ARM v7AR ARM 2.7.5). Historically
+ // LLVM has chosen to flush this to positive zero (most likely for
+ // GCC compatibility), so that's the chosen value here (the
+ // absence of its emission implies zero).
}
+ // TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath is the
+ // equivalent of GCC's -ffinite-math-only flag.
if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath)
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
ARMBuildAttrs::Allowed);
@@ -706,7 +796,7 @@ void ARMAsmPrinter::emitAttributes() {
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
ARMBuildAttrs::AllowIEE754);
- if (Subtarget->allowsUnalignedMem())
+ if (STI.allowsUnalignedMem())
ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
ARMBuildAttrs::Allowed);
else
@@ -719,21 +809,28 @@ void ARMAsmPrinter::emitAttributes() {
ATS.emitAttribute(ARMBuildAttrs::ABI_align_preserved, 1);
// ABI_HardFP_use attribute to indicate single precision FP.
- if (Subtarget->isFPOnlySP())
+ if (STI.isFPOnlySP())
ATS.emitAttribute(ARMBuildAttrs::ABI_HardFP_use,
ARMBuildAttrs::HardFPSinglePrecision);
// Hard float. Use both S and D registers and conform to AAPCS-VFP.
- if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard)
+ if (STI.isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard)
ATS.emitAttribute(ARMBuildAttrs::ABI_VFP_args, ARMBuildAttrs::HardFPAAPCS);
// FIXME: Should we signal R9 usage?
- if (Subtarget->hasFP16())
- ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP);
+ if (STI.hasFP16())
+ ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP);
+
+ // FIXME: To support emitting this build attribute as GCC does, the
+ // -mfp16-format option and associated plumbing must be
+ // supported. For now the __fp16 type is exposed by default, so this
+ // attribute should be emitted with value 1.
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_16bit_format,
+ ARMBuildAttrs::FP16FormatIEEE);
- if (Subtarget->hasMPExtension())
- ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);
+ if (STI.hasMPExtension())
+ ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);
// Hardware divide in ARM mode is part of base arch, starting from ARMv8.
// If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M).
@@ -741,14 +838,14 @@ void ARMAsmPrinter::emitAttributes() {
// arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits.
// AllowDIVExt is only emitted if hwdiv isn't available in the base arch;
// otherwise, the default value (AllowDIVIfExists) applies.
- if (Subtarget->hasDivideInARMMode() && !Subtarget->hasV8Ops())
- ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt);
+ if (STI.hasDivideInARMMode() && !STI.hasV8Ops())
+ ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt);
if (MMI) {
if (const Module *SourceModule = MMI->getModule()) {
// ABI_PCS_wchar_t to indicate wchar_t width
// FIXME: There is no way to emit value 0 (wchar_t prohibited).
- if (auto WCharWidthValue = cast_or_null<ConstantInt>(
+ if (auto WCharWidthValue = mdconst::extract_or_null<ConstantInt>(
SourceModule->getModuleFlag("wchar_size"))) {
int WCharWidth = WCharWidthValue->getZExtValue();
assert((WCharWidth == 2 || WCharWidth == 4) &&
@@ -759,7 +856,7 @@ void ARMAsmPrinter::emitAttributes() {
// ABI_enum_size to indicate enum width
// FIXME: There is no way to emit value 0 (enums prohibited) or value 3
// (all enums contain a value needing 32 bits to encode).
- if (auto EnumWidthValue = cast_or_null<ConstantInt>(
+ if (auto EnumWidthValue = mdconst::extract_or_null<ConstantInt>(
SourceModule->getModuleFlag("min_enum_size"))) {
int EnumWidth = EnumWidthValue->getZExtValue();
assert((EnumWidth == 1 || EnumWidth == 4) &&
@@ -774,22 +871,20 @@ void ARMAsmPrinter::emitAttributes() {
// it as another callee-saved register, but not as SB or a TLS pointer; It
// would instead be nicer to push this from the frontend as metadata, as we do
// for the wchar and enum size tags
- if (Subtarget->isR9Reserved())
- ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use,
- ARMBuildAttrs::R9Reserved);
+ if (STI.isR9Reserved())
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9Reserved);
else
- ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use,
- ARMBuildAttrs::R9IsGPR);
-
- if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization())
- ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
- ARMBuildAttrs::AllowTZVirtualization);
- else if (Subtarget->hasTrustZone())
- ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
- ARMBuildAttrs::AllowTZ);
- else if (Subtarget->hasVirtualization())
- ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
- ARMBuildAttrs::AllowVirtualization);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9IsGPR);
+
+ if (STI.hasTrustZone() && STI.hasVirtualization())
+ ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowTZVirtualization);
+ else if (STI.hasTrustZone())
+ ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowTZ);
+ else if (STI.hasVirtualization())
+ ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowVirtualization);
ATS.finishAttributeSection();
}
@@ -858,9 +953,8 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
void ARMAsmPrinter::
EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
- const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
- int Size =
- TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(MCPV->getType());
+ const DataLayout *DL = TM.getDataLayout();
+ int Size = TM.getDataLayout()->getTypeAllocSize(MCPV->getType());
ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
@@ -1176,7 +1270,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
#include "ARMGenMCPseudoLowering.inc"
void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *DL = TM.getDataLayout();
// If we just ended a constant pool, mark it as such.
if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) {
@@ -1251,18 +1345,34 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
case ARM::tBX_CALL: {
- EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tMOVr)
- .addReg(ARM::LR)
- .addReg(ARM::PC)
- // Add predicate operands.
- .addImm(ARMCC::AL)
- .addReg(0));
+ if (Subtarget->hasV5TOps())
+ llvm_unreachable("Expected BLX to be selected for v5t+");
+
+ // On ARM v4t, when doing a call from thumb mode, we need to ensure
+ // that the saved lr has its LSB set correctly (the arch doesn't
+ // have blx).
+ // So here we generate a bl to a small jump pad that does bx rN.
+ // The jump pads are emitted after the function body.
+
+ unsigned TReg = MI->getOperand(0).getReg();
+ MCSymbol *TRegSym = nullptr;
+ for (unsigned i = 0, e = ThumbIndirectPads.size(); i < e; i++) {
+ if (ThumbIndirectPads[i].first == TReg) {
+ TRegSym = ThumbIndirectPads[i].second;
+ break;
+ }
+ }
- EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tBX)
- .addReg(MI->getOperand(0).getReg())
- // Add predicate operands.
- .addImm(ARMCC::AL)
- .addReg(0));
+ if (!TRegSym) {
+ TRegSym = OutContext.CreateTempSymbol();
+ ThumbIndirectPads.push_back(std::make_pair(TReg, TRegSym));
+ }
+
+ // Create a link-saving branch to the Reg Indirect Jump Pad.
+ EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tBL)
+ // Predicate comes first here.
+ .addImm(ARMCC::AL).addReg(0)
+ .addExpr(MCSymbolRefExpr::Create(TRegSym, OutContext)));
return;
}
case ARM::BMOVPCRX_CALL: {
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 5ff20ce..50cb954 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -20,6 +20,7 @@ class ARMFunctionInfo;
class MCOperand;
class MachineConstantPool;
class MachineOperand;
+class MCSymbol;
namespace ARM {
enum DW_ISA {
@@ -45,12 +46,14 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
/// InConstantPool - Maintain state when emitting a sequence of constant
/// pool entries so we can properly mark them as data regions.
bool InConstantPool;
+
+ /// ThumbIndirectPads - These maintain a per-function list of jump pad
+ /// labels used for ARMv4t thumb code to make register indirect calls.
+ SmallVector<std::pair<unsigned, MCSymbol*>, 4> ThumbIndirectPads;
+
public:
- explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer), AFI(nullptr), MCP(nullptr),
- InConstantPool(false) {
- Subtarget = &TM.getSubtarget<ARMSubtarget>();
- }
+ explicit ARMAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer);
const char *getPassName() const override {
return "ARM Assembly / Object Emitter";
@@ -100,12 +103,13 @@ private:
const MachineInstr *MI);
public:
- unsigned getISAEncoding() override {
+ unsigned getISAEncoding(const Function *F) override {
// ARM/Darwin adds ISA to the DWARF info for each function.
- if (!Subtarget->isTargetMachO())
+ Triple TT(TM.getTargetTriple());
+ if (!TT.isOSBinFormatMachO())
return 0;
- return Subtarget->isThumb() ?
- ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm;
+ const ARMSubtarget &STI = TM.getSubtarget<ARMSubtarget>(*F);
+ return STI.isThumb() ? ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm;
}
private:
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 7a315c4..29ee22e 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1836,8 +1836,10 @@ bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
return false;
}
-MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
- bool PreferFalse) const {
+MachineInstr *
+ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
+ SmallPtrSetImpl<MachineInstr *> &SeenMIs,
+ bool PreferFalse) const {
assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
"Unknown select instruction");
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
@@ -1885,6 +1887,10 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
NewMI.addOperand(FalseReg);
NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
+ // Update SeenMIs set: register newly created MI and erase removed DefMI.
+ SeenMIs.insert(NewMI);
+ SeenMIs.erase(DefMI);
+
// The caller will erase MI, but not DefMI.
DefMI->eraseFromParent();
return NewMI;
@@ -1985,8 +1991,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
unsigned NumBytes) {
// This optimisation potentially adds lots of load and store
// micro-operations, it's only really a great benefit to code-size.
- if (!MF.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::MinSize))
+ if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize))
return false;
// If only one register is pushed/popped, LLVM can use an LDR/STR
@@ -2394,7 +2399,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
// Conservatively refuse to convert an instruction which isn't in the same
// BB as the comparison.
- // For CMPri, we need to check Sub, thus we can't return here.
+ // For CMPri w/ CmpValue != 0, a Sub may still be a candidate.
+ // Thus we cannot return here.
if (CmpInstr->getOpcode() == ARM::CMPri ||
CmpInstr->getOpcode() == ARM::t2CMPri)
MI = nullptr;
@@ -2473,8 +2479,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
case ARM::t2EORrr:
case ARM::t2EORri: {
// Scan forward for the use of CPSR
- // When checking against MI: if it's a conditional code requires
- // checking of V bit, then this is not safe to do.
+ // When checking against MI: if it's a conditional code that requires
+ // checking of the V bit or C bit, then this is not safe to do.
// It is safe to remove CmpInstr if CPSR is redefined or killed.
// If we are done with the basic block, we need to check whether CPSR is
// live-out.
@@ -2541,19 +2547,30 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
OperandsToUpdate.push_back(
std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
}
- } else
+ } else {
+ // No Sub, so this is x = <op> y, z; cmp x, 0.
switch (CC) {
- default:
+ case ARMCC::EQ: // Z
+ case ARMCC::NE: // Z
+ case ARMCC::MI: // N
+ case ARMCC::PL: // N
+ case ARMCC::AL: // none
// CPSR can be used multiple times, we should continue.
break;
- case ARMCC::VS:
- case ARMCC::VC:
- case ARMCC::GE:
- case ARMCC::LT:
- case ARMCC::GT:
- case ARMCC::LE:
+ case ARMCC::HS: // C
+ case ARMCC::LO: // C
+ case ARMCC::VS: // V
+ case ARMCC::VC: // V
+ case ARMCC::HI: // C Z
+ case ARMCC::LS: // C Z
+ case ARMCC::GE: // N V
+ case ARMCC::LT: // N V
+ case ARMCC::GT: // Z N V
+ case ARMCC::LE: // Z N V
+ // The instruction uses the V bit or C bit which is not safe.
return false;
}
+ }
}
}
@@ -3647,9 +3664,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// instructions).
if (Latency > 0 && Subtarget.isThumb2()) {
const MachineFunction *MF = DefMI->getParent()->getParent();
- if (MF->getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize))
+ if (MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
--Latency;
}
return Latency;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 0ae291b..ecbcf5c 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -261,7 +261,9 @@ public:
unsigned &TrueOp, unsigned &FalseOp,
bool &Optimizable) const override;
- MachineInstr *optimizeSelect(MachineInstr *MI, bool) const override;
+ MachineInstr *optimizeSelect(MachineInstr *MI,
+ SmallPtrSetImpl<MachineInstr *> &SeenMIs,
+ bool) const override;
/// FoldImmediate - 'Reg' is known to be defined by a move immediate
/// instruction, try to fold the immediate into the use instruction.
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 6dc0493..7574727 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -60,9 +60,8 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti)
const MCPhysReg*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- const MCPhysReg *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI())
- ? CSR_iOS_SaveList
- : CSR_AAPCS_SaveList;
+ const MCPhysReg *RegList =
+ STI.isTargetDarwin() ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
if (!MF) return RegList;
@@ -95,8 +94,7 @@ ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
if (CC == CallingConv::GHC)
// This is academic becase all GHC calls are (supposed to be) tail calls
return CSR_NoRegs_RegMask;
- return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
- ? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
+ return STI.isTargetDarwin() ? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
}
const uint32_t*
@@ -117,8 +115,8 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
if (CC == CallingConv::GHC)
// This is academic becase all GHC calls are (supposed to be) tail calls
return nullptr;
- return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
- ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask;
+ return STI.isTargetDarwin() ? CSR_iOS_ThisReturn_RegMask
+ : CSR_AAPCS_ThisReturn_RegMask;
}
BitVector ARMBaseRegisterInfo::
@@ -266,7 +264,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
}
void
-ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg,
MachineFunction &MF) const {
MachineRegisterInfo *MRI = &MF.getRegInfo();
std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
@@ -356,10 +354,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
return false;
// We may also need a base pointer if there are dynamic allocas or stack
// pointer adjustments around calls.
- if (MF.getTarget()
- .getSubtargetImpl()
- ->getFrameLowering()
- ->hasReservedCallFrame(MF))
+ if (MF.getSubtarget().getFrameLowering()->hasReservedCallFrame(MF))
return true;
// A base pointer is required and allowed. Check that it isn't too late to
// reserve it.
@@ -370,14 +365,10 @@ bool ARMBaseRegisterInfo::
needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
- unsigned StackAlign = MF.getTarget()
- .getSubtargetImpl()
- ->getFrameLowering()
- ->getStackAlignment();
- bool requiresRealignment =
- ((MFI->getMaxAlignment() > StackAlign) ||
- F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackAlignment));
+ unsigned StackAlign =
+ MF.getSubtarget().getFrameLowering()->getStackAlignment();
+ bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
+ F->hasFnAttribute(Attribute::StackAlignment));
return requiresRealignment && canRealignStack(MF);
}
@@ -555,12 +546,13 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// and pick a real one.
Offset += 128; // 128 bytes of spill slots
- // If there is a frame pointer, try using it.
+ // If there's a frame pointer and the addressing mode allows it, try using it.
// The FP is only available if there is no dynamic realignment. We
// don't know for sure yet whether we'll need that, so we guess based
// on whether there are any local variables that would trigger it.
unsigned StackAlign = TFI->getStackAlignment();
- if (TFI->hasFP(MF) &&
+ if (TFI->hasFP(MF) &&
+ (MI->getDesc().TSFlags & ARMII::AddrModeMask) != ARMII::AddrModeT1_s &&
!((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
if (isFrameOffsetLegal(MI, FPOffset))
return false;
@@ -677,7 +669,7 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
NumBits = 8;
break;
case ARMII::AddrModeT1_s:
- NumBits = 5;
+ NumBits = 8;
Scale = 4;
isSigned = false;
break;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index e9bc412..17027c2 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -135,7 +135,7 @@ public:
const MachineFunction &MF,
const VirtRegMap *VRM) const override;
- void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+ void updateRegAllocHint(unsigned Reg, unsigned NewReg,
MachineFunction &MF) const override;
bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const override;
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index bd07236..d687568 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -31,7 +31,7 @@ static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
// Try to get the first register.
- if (unsigned Reg = State.AllocateReg(RegList, 4))
+ if (unsigned Reg = State.AllocateReg(RegList))
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
else {
// For the 2nd half of a v2f64, do not fail.
@@ -46,7 +46,7 @@ static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
}
// Try to get the second register.
- if (unsigned Reg = State.AllocateReg(RegList, 4))
+ if (unsigned Reg = State.AllocateReg(RegList))
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
else
State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
@@ -76,11 +76,11 @@ static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 };
static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
- unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
+ unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList);
if (Reg == 0) {
// If we had R3 unallocated only, now we still must to waste it.
- Reg = State.AllocateReg(GPRArgRegs, 4);
+ Reg = State.AllocateReg(GPRArgRegs);
assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64");
// For the 2nd half of a v2f64, do not just fail.
@@ -126,7 +126,7 @@ static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
- unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
+ unsigned Reg = State.AllocateReg(HiRegList, LoRegList);
if (Reg == 0)
return false; // we didn't handle it
@@ -160,6 +160,8 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
State);
}
+static const uint16_t RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3,
ARM::S4, ARM::S5, ARM::S6, ARM::S7,
ARM::S8, ARM::S9, ARM::S10, ARM::S11,
@@ -168,85 +170,114 @@ static const uint16_t DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
+
// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA
// has InConsecutiveRegs set, and that the last member also has
// InConsecutiveRegsLast set. We must process all members of the HA before
// we can allocate it, as we need to know the total number of registers that
// will be needed in order to (attempt to) allocate a contiguous block.
-static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags, CCState &State) {
- SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs();
+static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
// AAPCS HFAs must have 1-4 elements, all of the same type
- assert(PendingHAMembers.size() < 4);
- if (PendingHAMembers.size() > 0)
- assert(PendingHAMembers[0].getLocVT() == LocVT);
+ if (PendingMembers.size() > 0)
+ assert(PendingMembers[0].getLocVT() == LocVT);
// Add the argument to the list to be allocated once we know the size of the
- // HA
- PendingHAMembers.push_back(
- CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
-
- if (ArgFlags.isInConsecutiveRegsLast()) {
- assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 &&
- "Homogeneous aggregates must have between 1 and 4 members");
-
- // Try to allocate a contiguous block of registers, each of the correct
- // size to hold one member.
- const uint16_t *RegList;
- unsigned NumRegs;
- switch (LocVT.SimpleTy) {
- case MVT::f32:
- RegList = SRegList;
- NumRegs = 16;
- break;
- case MVT::f64:
- RegList = DRegList;
- NumRegs = 8;
- break;
- case MVT::v2f64:
- RegList = QRegList;
- NumRegs = 4;
- break;
- default:
- llvm_unreachable("Unexpected member type for HA");
- break;
- }
+ // aggregate. Store the type's required alignmnent as extra info for later: in
+ // the [N x i64] case all trace has been removed by the time we actually get
+ // to do allocation.
+ PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo,
+ ArgFlags.getOrigAlign()));
- unsigned RegResult =
- State.AllocateRegBlock(RegList, NumRegs, PendingHAMembers.size());
-
- if (RegResult) {
- for (SmallVectorImpl<CCValAssign>::iterator It = PendingHAMembers.begin();
- It != PendingHAMembers.end(); ++It) {
- It->convertToReg(RegResult);
- State.addLoc(*It);
- ++RegResult;
- }
- PendingHAMembers.clear();
- return true;
- }
+ if (!ArgFlags.isInConsecutiveRegsLast())
+ return true;
+
+ // Try to allocate a contiguous block of registers, each of the correct
+ // size to hold one member.
+ unsigned Align = std::min(PendingMembers[0].getExtraInfo(), 8U);
- // Register allocation failed, fall back to the stack
+ ArrayRef<uint16_t> RegList;
+ switch (LocVT.SimpleTy) {
+ case MVT::i32: {
+ RegList = RRegList;
+ unsigned RegIdx = State.getFirstUnallocated(RegList);
- // Mark all VFP regs as unavailable (AAPCS rule C.2.vfp)
- for (unsigned regNo = 0; regNo < 16; ++regNo)
- State.AllocateReg(SRegList[regNo]);
+ // First consume all registers that would give an unaligned object. Whether
+ // we go on stack or in regs, no-one will be using them in future.
+ unsigned RegAlign = RoundUpToAlignment(Align, 4) / 4;
+ while (RegIdx % RegAlign != 0 && RegIdx < RegList.size())
+ State.AllocateReg(RegList[RegIdx++]);
- unsigned Size = LocVT.getSizeInBits() / 8;
- unsigned Align = std::min(Size, 8U);
+ break;
+ }
+ case MVT::f32:
+ RegList = SRegList;
+ break;
+ case MVT::f64:
+ RegList = DRegList;
+ break;
+ case MVT::v2f64:
+ RegList = QRegList;
+ break;
+ default:
+ llvm_unreachable("Unexpected member type for block aggregate");
+ break;
+ }
+
+ unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
+ if (RegResult) {
+ for (SmallVectorImpl<CCValAssign>::iterator It = PendingMembers.begin();
+ It != PendingMembers.end(); ++It) {
+ It->convertToReg(RegResult);
+ State.addLoc(*It);
+ ++RegResult;
+ }
+ PendingMembers.clear();
+ return true;
+ }
+
+ // Register allocation failed, we'll be needing the stack
+ unsigned Size = LocVT.getSizeInBits() / 8;
+ if (LocVT == MVT::i32 && State.getNextStackOffset() == 0) {
+ // If nothing else has used the stack until this point, a non-HFA aggregate
+ // can be split between regs and stack.
+ unsigned RegIdx = State.getFirstUnallocated(RegList);
+ for (auto &It : PendingMembers) {
+ if (RegIdx >= RegList.size())
+ It.convertToMem(State.AllocateStack(Size, Size));
+ else
+ It.convertToReg(State.AllocateReg(RegList[RegIdx++]));
- for (auto It : PendingHAMembers) {
- It.convertToMem(State.AllocateStack(Size, Align));
State.addLoc(It);
}
+ PendingMembers.clear();
+ return true;
+ } else if (LocVT != MVT::i32)
+ RegList = SRegList;
+
+ // Mark all regs as unavailable (AAPCS rule C.2.vfp for VFP, C.6 for core)
+ for (auto Reg : RegList)
+ State.AllocateReg(Reg);
- // All pending members have now been allocated
- PendingHAMembers.clear();
+ for (auto &It : PendingMembers) {
+ It.convertToMem(State.AllocateStack(Size, Align));
+ State.addLoc(It);
+
+ // After the first item has been allocated, the rest are packed as tightly
+ // as possible. (E.g. an incoming i64 would have starting Align of 8, but
+ // we'll be allocating a bunch of i32 slots).
+ Align = Size;
}
- // This will be allocated by the last member of the HA
+ // All pending members have now been allocated
+ PendingMembers.clear();
+
+ // This will be allocated by the last member of the aggregate
return true;
}
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 526089b..7dd21ecbe 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -175,7 +175,7 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
// HFAs are passed in a contiguous block of registers, or on the stack
- CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_HA">>,
+ CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_Aggregate">>,
CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 29405eb..9966cd7 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -383,11 +383,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
<< MCP->getConstants().size() << " CP entries, aligned to "
<< MCP->getConstantPoolAlignment() << " bytes *****\n");
- TII = (const ARMBaseInstrInfo *)MF->getTarget()
- .getSubtargetImpl()
- ->getInstrInfo();
+ STI = &static_cast<const ARMSubtarget &>(MF->getSubtarget());
+ TII = STI->getInstrInfo();
AFI = MF->getInfo<ARMFunctionInfo>();
- STI = &MF->getTarget().getSubtarget<ARMSubtarget>();
isThumb = AFI->isThumbFunction();
isThumb1 = AFI->isThumb1OnlyFunction();
@@ -532,7 +530,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
// identity mapping of CPI's to CPE's.
const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
- const DataLayout &TD = *MF->getSubtarget().getDataLayout();
+ const DataLayout &TD = *MF->getTarget().getDataLayout();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
assert(Size >= 4 && "Too small constant pool entry");
@@ -1270,7 +1268,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
ImmBranches.push_back(ImmBranch(&UserMBB->back(),
MaxDisp, false, UncondBr));
- BBInfo[UserMBB->getNumber()].Size += Delta;
+ computeBlockSize(UserMBB);
adjustBBOffsetsAfter(UserMBB);
return;
}
@@ -1952,7 +1950,9 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
DEBUG(dbgs() << "Shrink JT: " << *MI << " addr: " << *AddrMI
<< " lea: " << *LeaMI);
unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
- MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc))
+ MachineBasicBlock::iterator MI_JT = MI;
+ MachineInstr *NewJTMI =
+ BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc))
.addReg(IdxReg, getKillRegState(IdxRegKill))
.addJumpTableIndex(JTI, JTOP.getTargetFlags())
.addImm(MI->getOperand(JTOpIdx+1).getImm());
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 2d80518..4438f50 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -22,8 +22,8 @@
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
@@ -887,6 +887,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned MaxAlign = MFI->getMaxAlignment();
assert (!AFI->isThumb1OnlyFunction());
// Emit bic r6, r6, MaxAlign
+ assert(MaxAlign <= 256 && "The BIC instruction cannot encode "
+ "immediates larger than 256 with all lower "
+ "bits set.");
unsigned bicOpc = AFI->isThumbFunction() ?
ARM::t2BICri : ARM::BICri;
AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
@@ -980,7 +983,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci;
unsigned PICAddOpc =
IsARM
- ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICADD : ARM::PICLDR)
+ ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
: ARM::tPICADD;
// We need a new const-pool entry to load from.
@@ -1129,7 +1132,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// Add the source operands (D subregs).
unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
- MIB.addReg(D0).addReg(D1);
+ MIB.addReg(D0, SrcIsKill ? RegState::Kill : 0)
+ .addReg(D1, SrcIsKill ? RegState::Kill : 0);
if (SrcIsKill) // Add an implicit kill for the Q register.
MIB->addRegisterKilled(SrcReg, TRI, true);
@@ -1342,11 +1346,9 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
}
bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
- const TargetMachine &TM = MF.getTarget();
- TII = static_cast<const ARMBaseInstrInfo *>(
- TM.getSubtargetImpl()->getInstrInfo());
- TRI = TM.getSubtargetImpl()->getRegisterInfo();
- STI = &TM.getSubtarget<ARMSubtarget>();
+ STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ TII = STI->getInstrInfo();
+ TRI = STI->getRegisterInfo();
AFI = MF.getInfo<ARMFunctionInfo>();
bool Modified = false;
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index a5f635e..375d394 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -93,11 +93,11 @@ class ARMFastISel final : public FastISel {
explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo)
: FastISel(funcInfo, libInfo),
+ Subtarget(
+ &static_cast<const ARMSubtarget &>(funcInfo.MF->getSubtarget())),
M(const_cast<Module &>(*funcInfo.Fn->getParent())),
- TM(funcInfo.MF->getTarget()),
- TII(*TM.getSubtargetImpl()->getInstrInfo()),
- TLI(*TM.getSubtargetImpl()->getTargetLowering()) {
- Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ TM(funcInfo.MF->getTarget()), TII(*Subtarget->getInstrInfo()),
+ TLI(*Subtarget->getTargetLowering()) {
AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
isThumb2 = AFI->isThumbFunction();
Context = &funcInfo.Fn->getContext();
@@ -189,9 +189,7 @@ class ARMFastISel final : public FastISel {
unsigned ARMSelectCallOp(bool UseReg);
unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
- const TargetLowering *getTargetLowering() {
- return TM.getSubtargetImpl()->getTargetLowering();
- }
+ const TargetLowering *getTargetLowering() { return &TLI; }
// Call handling routines.
private:
@@ -586,9 +584,8 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
Reloc::Model RelocM = TM.getRelocationModel();
bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM);
- const TargetRegisterClass *RC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::rGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
+ const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
+ : &ARM::GPRRegClass;
unsigned DestReg = createResultReg(RC);
// FastISel TLS support on non-MachO is broken, punt to SelectionDAG.
@@ -893,9 +890,8 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
// put the alloca address into a register, set the base type back to
// register and continue. This should almost never happen.
if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
- const TargetRegisterClass *RC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
+ const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass;
unsigned ResultReg = createResultReg(RC);
unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1094,9 +1090,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
// This is mostly going to be Neon/vector support.
default: return false;
case MVT::i1: {
- unsigned Res = createResultReg(isThumb2 ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass);
+ unsigned Res = createResultReg(isThumb2 ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass);
unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1500,9 +1495,8 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
// Now set a register based on the comparison. Explicitly set the predicates
// here.
unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
- const TargetRegisterClass *RC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::rGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
+ const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
+ : &ARM::GPRRegClass;
unsigned DestReg = createResultReg(RC);
Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
unsigned ZeroReg = fastMaterializeConstant(Zero);
@@ -2490,19 +2484,12 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
MFI->setFrameAddressIsTaken(true);
- unsigned LdrOpc;
- const TargetRegisterClass *RC;
- if (isThumb2) {
- LdrOpc = ARM::t2LDRi12;
- RC = (const TargetRegisterClass*)&ARM::tGPRRegClass;
- } else {
- LdrOpc = ARM::LDRi12;
- RC = (const TargetRegisterClass*)&ARM::GPRRegClass;
- }
+ unsigned LdrOpc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
+ const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass;
const ARMBaseRegisterInfo *RegInfo =
- static_cast<const ARMBaseRegisterInfo *>(
- TM.getSubtargetImpl()->getRegisterInfo());
+ static_cast<const ARMBaseRegisterInfo *>(Subtarget->getRegisterInfo());
unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
unsigned SrcReg = FramePtr;
@@ -3075,13 +3062,13 @@ namespace llvm {
FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) {
const TargetMachine &TM = funcInfo.MF->getTarget();
-
- const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ const ARMSubtarget &STI =
+ static_cast<const ARMSubtarget &>(funcInfo.MF->getSubtarget());
// Thumb2 support on iOS; ARM support on iOS, Linux and NaCl.
bool UseFastISel = false;
- UseFastISel |= Subtarget->isTargetMachO() && !Subtarget->isThumb1Only();
- UseFastISel |= Subtarget->isTargetLinux() && !Subtarget->isThumb();
- UseFastISel |= Subtarget->isTargetNaCl() && !Subtarget->isThumb();
+ UseFastISel |= STI.isTargetMachO() && !STI.isThumb1Only();
+ UseFastISel |= STI.isTargetLinux() && !STI.isThumb();
+ UseFastISel |= STI.isTargetNaCl() && !STI.isThumb();
if (UseFastISel) {
// iOS always has a FP for backtracking, force other targets
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 80add7a..5a5bd57 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -164,9 +164,13 @@ static int sizeOfSPAdjustment(const MachineInstr *MI) {
static bool WindowsRequiresStackProbe(const MachineFunction &MF,
size_t StackSizeInBytes) {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- if (MFI->getStackProtectorIndex() > 0)
- return StackSizeInBytes >= 4080;
- return StackSizeInBytes >= 4096;
+ const Function *F = MF.getFunction();
+ unsigned StackProbeSize = (MFI->getStackProtectorIndex() > 0) ? 4080 : 4096;
+ if (F->hasFnAttribute("stack-probe-size"))
+ F->getFnAttribute("stack-probe-size")
+ .getValueAsString()
+ .getAsInteger(0, StackProbeSize);
+ return StackSizeInBytes >= StackProbeSize;
}
namespace {
@@ -203,12 +207,77 @@ struct StackAdjustingInsts {
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, std::next(Info.I), dl,
- TII.get(TargetOpcode::CFI_INSTRUCTION)).addCFIIndex(CFIIndex);
+ TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
}
};
}
+/// Emit an instruction sequence that will align the address in
+/// register Reg by zero-ing out the lower bits. For versions of the
+/// architecture that support Neon, this must be done in a single
+/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
+/// single instruction. That function only gets called when optimizing
+/// spilling of D registers on a core with the Neon instruction set
+/// present.
+static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
+ const TargetInstrInfo &TII,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, const unsigned Reg,
+ const unsigned Alignment,
+ const bool MustBeSingleInstruction) {
+ const ARMSubtarget &AST =
+ static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
+ const unsigned AlignMask = Alignment - 1;
+ const unsigned NrBitsToZero = countTrailingZeros(Alignment);
+ assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
+ if (!AFI->isThumbFunction()) {
+ // if the BFC instruction is available, use that to zero the lower
+ // bits:
+ // bfc Reg, #0, log2(Alignment)
+ // otherwise use BIC, if the mask to zero the required number of bits
+ // can be encoded in the bic immediate field
+ // bic Reg, Reg, Alignment-1
+ // otherwise, emit
+ // lsr Reg, Reg, log2(Alignment)
+ // lsl Reg, Reg, log2(Alignment)
+ if (CanUseBFC) {
+ AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(~AlignMask));
+ } else if (AlignMask <= 255) {
+ AddDefaultCC(
+ AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(AlignMask)));
+ } else {
+ assert(!MustBeSingleInstruction &&
+ "Shouldn't call emitAligningInstructions demanding a single "
+ "instruction to be emitted for large stack alignment for a target "
+ "without BFC.");
+ AddDefaultCC(AddDefaultPred(
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))));
+ AddDefaultCC(AddDefaultPred(
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))));
+ }
+ } else {
+ // Since this is only reached for Thumb-2 targets, the BFC instruction
+ // should always be available.
+ assert(CanUseBFC);
+ AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(~AlignMask));
+ }
+}
+
void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -218,15 +287,12 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
MCContext &Context = MMI.getContext();
const TargetMachine &TM = MF.getTarget();
const MCRegisterInfo *MRI = Context.getRegisterInfo();
- const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
- TM.getSubtargetImpl()->getRegisterInfo());
- const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
- TM.getSubtargetImpl()->getInstrInfo());
+ const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
+ const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
assert(!AFI->isThumb1OnlyFunction() &&
"This emitPrologue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned Align =
- TM.getSubtargetImpl()->getFrameLowering()->getStackAlignment();
+ unsigned Align = STI.getFrameLowering()->getStackAlignment();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
@@ -451,13 +517,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
nullptr, MRI->getDwarfRegNum(FramePtr, true),
-(ArgRegsSaveSize - FramePtrOffsetInPush)));
BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
} else {
unsigned CFIIndex =
MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(
nullptr, MRI->getDwarfRegNum(FramePtr, true)));
BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
}
@@ -491,7 +559,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI)));
BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
break;
}
}
@@ -514,7 +583,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
break;
}
@@ -535,7 +605,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
}
}
@@ -561,28 +632,24 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
// realigned.
if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
unsigned MaxAlign = MFI->getMaxAlignment();
- assert (!AFI->isThumb1OnlyFunction());
+ assert(!AFI->isThumb1OnlyFunction());
if (!AFI->isThumbFunction()) {
- // Emit bic sp, sp, MaxAlign
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
- TII.get(ARM::BICri), ARM::SP)
- .addReg(ARM::SP, RegState::Kill)
- .addImm(MaxAlign-1)));
+ emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
+ false);
} else {
- // We cannot use sp as source/dest register here, thus we're emitting the
- // following sequence:
+ // We cannot use sp as source/dest register here, thus we're using r4 to
+ // perform the calculations. We're emitting the following sequence:
// mov r4, sp
- // bic r4, r4, MaxAlign
+ // -- use emitAligningInstructions to produce best sequence to zero
+ // -- out lower bits in r4
// mov sp, r4
// FIXME: It will be better just to find spare register here.
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
- .addReg(ARM::SP, RegState::Kill));
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
- TII.get(ARM::t2BICri), ARM::R4)
- .addReg(ARM::R4, RegState::Kill)
- .addImm(MaxAlign-1)));
+ .addReg(ARM::SP, RegState::Kill));
+ emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
+ false);
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
- .addReg(ARM::R4, RegState::Kill));
+ .addReg(ARM::R4, RegState::Kill));
}
AFI->setShouldRestoreSPFromFP(true);
@@ -612,11 +679,59 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setShouldRestoreSPFromFP(true);
}
+// Resolve TCReturn pseudo-instruction
+void ARMFrameLowering::fixTCReturn(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc dl = MBBI->getDebugLoc();
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri))
+ return;
+
+ // Tail call return: adjust the stack pointer and jump to callee.
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+ // Jump to label or value in register.
+ if (RetOpcode == ARM::TCRETURNdi) {
+ unsigned TCOpcode = STI.isThumb() ?
+ (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
+ ARM::TAILJMPd;
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
+
+ // Add the default predicate in Thumb mode.
+ if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
+ } else if (RetOpcode == ARM::TCRETURNri) {
+ BuildMI(MBB, MBBI, dl,
+ TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ MachineInstr *NewMI = std::prev(MBBI);
+ for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+ NewMI->addOperand(MBBI->getOperand(i));
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ MBBI = NewMI;
+}
+
void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
- unsigned RetOpcode = MBBI->getOpcode();
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -627,18 +742,17 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
"This emitEpilogue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned Align = MF.getTarget()
- .getSubtargetImpl()
- ->getFrameLowering()
- ->getStackAlignment();
+ unsigned Align = STI.getFrameLowering()->getStackAlignment();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
int NumBytes = (int)MFI->getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
- if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC) {
+ fixTCReturn(MF, MBB);
return;
+ }
if (!AFI->hasStackFrame()) {
if (NumBytes - ArgRegsSaveSize != 0)
@@ -717,42 +831,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
}
- if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri) {
- // Tail call return: adjust the stack pointer and jump to callee.
- MBBI = MBB.getLastNonDebugInstr();
- MachineOperand &JumpTarget = MBBI->getOperand(0);
-
- // Jump to label or value in register.
- if (RetOpcode == ARM::TCRETURNdi) {
- unsigned TCOpcode = STI.isThumb() ?
- (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
- ARM::TAILJMPd;
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
- if (JumpTarget.isGlobal())
- MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
- JumpTarget.getTargetFlags());
- else {
- assert(JumpTarget.isSymbol());
- MIB.addExternalSymbol(JumpTarget.getSymbolName(),
- JumpTarget.getTargetFlags());
- }
-
- // Add the default predicate in Thumb mode.
- if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
- } else if (RetOpcode == ARM::TCRETURNri) {
- BuildMI(MBB, MBBI, dl,
- TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
- addReg(JumpTarget.getReg(), RegState::Kill);
- }
-
- MachineInstr *NewMI = std::prev(MBBI);
- for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
- NewMI->addOperand(MBBI->getOperand(i));
-
- // Delete the pseudo instruction TCRETURN.
- MBB.erase(MBBI);
- MBBI = NewMI;
- }
+ fixTCReturn(MF, MBB);
if (ArgRegsSaveSize)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
@@ -1062,15 +1141,16 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// The immediate is <= 64, so it doesn't need any special encoding.
unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
- .addReg(ARM::SP)
- .addImm(8 * NumAlignedDPRCS2Regs)));
+ .addReg(ARM::SP)
+ .addImm(8 * NumAlignedDPRCS2Regs)));
- // bic r4, r4, #align-1
- Opc = isThumb ? ARM::t2BICri : ARM::BICri;
unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment();
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
- .addReg(ARM::R4, RegState::Kill)
- .addImm(MaxAlign - 1)));
+ // We must set parameter MustBeSingleInstruction to true, since
+ // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
+ // stack alignment. Luckily, this can always be done since all ARM
+ // architecture versions that support Neon also support the BFC
+ // instruction.
+ emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
// mov sp, r4
// The stack pointer must be adjusted before spilling anything, otherwise
@@ -1387,25 +1467,20 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
return;
// Naked functions don't spill callee-saved registers.
- if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::Naked))
+ if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
return;
// We are planning to use NEON instructions vst1 / vld1.
- if (!MF.getTarget().getSubtarget<ARMSubtarget>().hasNEON())
+ if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON())
return;
// Don't bother if the default stack alignment is sufficiently high.
- if (MF.getTarget()
- .getSubtargetImpl()
- ->getFrameLowering()
- ->getStackAlignment() >= 8)
+ if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8)
return;
// Aligned spills require stack realignment.
- const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
- if (!RegInfo->canRealignStack(MF))
+ if (!static_cast<const ARMBaseRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
return;
// We always spill contiguous d-registers starting from d8. Count how many
@@ -1789,7 +1864,7 @@ static const uint64_t kSplitStackAvailable = 256;
void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
unsigned Opcode;
unsigned CFIIndex;
- const ARMSubtarget *ST = &MF.getTarget().getSubtarget<ARMSubtarget>();
+ const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
bool Thumb = ST->isThumb();
// Sadly, this currently doesn't support varargs, platforms other than
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index a83b773..b7be436 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -31,6 +31,8 @@ public:
void emitPrologue(MachineFunction &MF) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void fixTCReturn(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index 0e4f81c..a84603b 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -44,10 +44,9 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (LastMI && (MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) {
MachineInstr *DefMI = LastMI;
const MCInstrDesc &LastMCID = LastMI->getDesc();
- const TargetMachine &TM =
- MI->getParent()->getParent()->getTarget();
+ const MachineFunction *MF = MI->getParent()->getParent();
const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
- TM.getSubtargetImpl()->getInstrInfo());
+ MF->getSubtarget().getInstrInfo());
// Skip over one non-VFP / NEON instruction.
if (!LastMI->isBarrier() &&
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 6941579..6ebf640 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -70,7 +70,7 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override {
// Reset the subtarget each time through.
- Subtarget = &MF.getTarget().getSubtarget<ARMSubtarget>();
+ Subtarget = &MF.getSubtarget<ARMSubtarget>();
SelectionDAGISel::runOnMachineFunction(MF);
return true;
}
@@ -992,18 +992,24 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
Addr = N;
unsigned Alignment = 0;
- if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Parent)) {
+
+ MemSDNode *MemN = cast<MemSDNode>(Parent);
+
+ if (isa<LSBaseSDNode>(MemN) ||
+ ((MemN->getOpcode() == ARMISD::VST1_UPD ||
+ MemN->getOpcode() == ARMISD::VLD1_UPD) &&
+ MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
// This case occurs only for VLD1-lane/dup and VST1-lane instructions.
// The maximum alignment is equal to the memory size being referenced.
- unsigned LSNAlign = LSN->getAlignment();
- unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8;
- if (LSNAlign >= MemSize && MemSize > 1)
+ unsigned MMOAlign = MemN->getAlignment();
+ unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
+ if (MMOAlign >= MemSize && MemSize > 1)
Alignment = MemSize;
} else {
// All other uses of addrmode6 are for intrinsics. For now just record
// the raw alignment value; it will be refined later based on the legal
// alignment operands for the intrinsic.
- Alignment = cast<MemIntrinsicSDNode>(Parent)->getAlignment();
+ Alignment = MemN->getAlignment();
}
Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
@@ -1191,6 +1197,11 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
SDValue &Base, SDValue &OffImm) {
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ // Only multiples of 4 are allowed for the offset, so the frame object
+ // alignment must be at least 4.
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ if (MFI->getObjectAlignment(FI) < 4)
+ MFI->setObjectAlignment(FI, 4);
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
@@ -1208,6 +1219,11 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ // For LHS+RHS to result in an offset that's a multiple of 4 the object
+ // indexed by the LHS must be 4-byte aligned.
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ if (MFI->getObjectAlignment(FI) < 4)
+ MFI->setObjectAlignment(FI, 4);
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
@@ -1784,6 +1800,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2f64:
case MVT::v2i64: OpcodeIndex = 3;
assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
break;
@@ -1920,6 +1937,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2f64:
case MVT::v2i64: OpcodeIndex = 3;
assert(NumVecs == 1 && "v2i64 type only supported for VST1");
break;
@@ -2290,7 +2308,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
// Note: The width operand is encoded as width-1.
- unsigned Width = CountTrailingOnes_32(And_imm) - 1;
+ unsigned Width = countTrailingOnes(And_imm) - 1;
unsigned LSB = Srl_imm;
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
@@ -2494,6 +2512,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
if (Subtarget->isThumb1Only()) {
+ // Set the alignment of the frame object to 4, to avoid having to generate
+ // more than one ADD
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ if (MFI->getObjectAlignment(FI) < 4)
+ MFI->setObjectAlignment(FI, 4);
return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
CurDAG->getTargetConstant(0, MVT::i32));
} else {
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 0d0d81f..56290aa 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -156,11 +156,11 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
}
-ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
- : TargetLowering(TM) {
- Subtarget = &TM.getSubtarget<ARMSubtarget>();
- RegInfo = TM.getSubtargetImpl()->getRegisterInfo();
- Itins = TM.getSubtargetImpl()->getInstrItineraryData();
+ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
+ const ARMSubtarget &STI)
+ : TargetLowering(TM), Subtarget(&STI) {
+ RegInfo = Subtarget->getRegisterInfo();
+ Itins = Subtarget->getInstrItineraryData();
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
@@ -404,22 +404,20 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
addRegisterClass(MVT::f64, &ARM::DPRRegClass);
}
- for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
- for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
- setTruncStoreAction((MVT::SimpleValueType)VT,
- (MVT::SimpleValueType)InnerVT, Expand);
- setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
- setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ for (MVT VT : MVT::vector_valuetypes()) {
+ for (MVT InnerVT : MVT::vector_valuetypes()) {
+ setTruncStoreAction(VT, InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
+ }
- setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::MULHS, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
- setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
}
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
@@ -567,15 +565,18 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
setTargetDAGCombine(ISD::FDIV);
+ setTargetDAGCombine(ISD::LOAD);
// It is legal to extload from v4i8 to v4i16 or v4i32.
MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
MVT::v4i16, MVT::v2i16,
MVT::v2i32};
for (unsigned i = 0; i < 6; ++i) {
- setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal);
- setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal);
- setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal);
+ for (MVT VT : MVT::integer_vector_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, VT, Tys[i], Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Tys[i], Legal);
+ setLoadExtAction(ISD::SEXTLOAD, VT, Tys[i], Legal);
+ }
}
}
@@ -617,11 +618,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
}
- computeRegisterProperties();
+ computeRegisterProperties(Subtarget->getRegisterInfo());
// ARM does not have floating-point extending loads.
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
+ for (MVT VT : MVT::fp_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
+ }
// ... or truncating stores
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -629,7 +632,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
// ARM does not have i1 sign extending load.
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ for (MVT VT : MVT::integer_valuetypes())
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
// ARM supports all 4 flavors of integer indexed load / store.
if (!Subtarget->isThumb1Only()) {
@@ -963,13 +967,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
// of the difficulty prior to coalescing of modeling operand register classes
// due to the common occurrence of cross class copies and subregister insertions
// and extractions.
-std::pair<const TargetRegisterClass*, uint8_t>
-ARMTargetLowering::findRepresentativeClass(MVT VT) const{
+std::pair<const TargetRegisterClass *, uint8_t>
+ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
+ MVT VT) const {
const TargetRegisterClass *RRC = nullptr;
uint8_t Cost = 1;
switch (VT.SimpleTy) {
default:
- return TargetLowering::findRepresentativeClass(VT);
+ return TargetLowering::findRepresentativeClass(TRI, VT);
// Use DPR as representative register class for all floating point
// and vector types. Since there are 32 SPR registers and 32 DPR registers so
// the cost is 1 for both f32 and f64.
@@ -1166,12 +1171,6 @@ ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
return ARM::createFastISel(funcInfo, libInfo);
}
-/// getMaximalGlobalOffset - Returns the maximal possible offset which can
-/// be used for loads / stores from the global.
-unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
- return (Subtarget->isThumb1Only() ? 127 : 4095);
-}
-
Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
unsigned NumVals = N->getNumValues();
if (!NumVals)
@@ -1190,8 +1189,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
// Load are scheduled for latency even if there instruction itinerary
// is not available.
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
if (MCID.getNumDefs() == 0)
@@ -1783,8 +1781,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// FIXME: handle tail calls differently.
unsigned CallOpc;
- bool HasMinSizeAttr = MF.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::MinSize);
+ bool HasMinSizeAttr = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
@@ -1815,9 +1812,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Add a register mask operand representing the call-preserved registers.
if (!isTailCall) {
const uint32_t *Mask;
- const TargetRegisterInfo *TRI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
- const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
+ const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
if (isThisReturn) {
// For 'this' returns, use the R0-preserving mask if applicable
Mask = ARI->getThisReturnPreservedMask(CallConv);
@@ -1865,7 +1860,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
void
ARMTargetLowering::HandleByVal(
CCState *State, unsigned &size, unsigned Align) const {
- unsigned reg = State->AllocateReg(GPRArgRegs, 4);
+ unsigned reg = State->AllocateReg(GPRArgRegs);
assert((State->getCallOrPrologue() == Prologue ||
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
@@ -1875,7 +1870,7 @@ ARMTargetLowering::HandleByVal(
unsigned AlignInRegs = Align / 4;
unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
for (unsigned i = 0; i < Waste; ++i)
- reg = State->AllocateReg(GPRArgRegs, 4);
+ reg = State->AllocateReg(GPRArgRegs);
}
if (reg != 0) {
unsigned excess = 4 * (ARM::R4 - reg);
@@ -1886,7 +1881,7 @@ ARMTargetLowering::HandleByVal(
// remained registers.
const unsigned NSAAOffset = State->getNextStackOffset();
if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
- while (State->AllocateReg(GPRArgRegs, 4))
+ while (State->AllocateReg(GPRArgRegs))
;
return;
}
@@ -1903,7 +1898,7 @@ ARMTargetLowering::HandleByVal(
// Note, first register is allocated in the beginning of function already,
// allocate remained amount of registers we need.
for (unsigned i = reg+1; i != ByValRegEnd; ++i)
- State->AllocateReg(GPRArgRegs, 4);
+ State->AllocateReg(GPRArgRegs);
// A byval parameter that is split between registers and memory needs its
// size truncated here.
// In the case where the entire structure fits in registers, we set the
@@ -2025,7 +2020,9 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// cannot rely on the linker replacing the tail call with a return.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
- if (GV->hasExternalWeakLinkage())
+ const Triple TT(getTargetMachine().getTargetTriple());
+ if (GV->hasExternalWeakLinkage() &&
+ (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
return false;
}
@@ -2084,8 +2081,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// the caller's fixed stack objects.
MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
i != e;
++i, ++realArgIdx) {
@@ -2837,16 +2833,11 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
NumGPRs = REnd - RBegin;
} else {
unsigned int firstUnalloced;
- firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
- sizeof(GPRArgRegs) /
- sizeof(GPRArgRegs[0]));
+ firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs);
NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
}
- unsigned Align = MF.getTarget()
- .getSubtargetImpl()
- ->getFrameLowering()
- ->getStackAlignment();
+ unsigned Align = Subtarget->getFrameLowering()->getStackAlignment();
ArgRegsSize = NumGPRs * 4;
// If parameter is split between stack and GPRs...
@@ -2913,8 +2904,7 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
firstRegToSaveIndex = RBegin - ARM::R0;
lastRegToSaveIndex = REnd - ARM::R0;
} else {
- firstRegToSaveIndex = CCInfo.getFirstUnallocated
- (GPRArgRegs, array_lengthof(GPRArgRegs));
+ firstRegToSaveIndex = CCInfo.getFirstUnallocated(GPRArgRegs);
lastRegToSaveIndex = 4;
}
@@ -3087,8 +3077,11 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
- CurArgIdx = Ins[VA.getValNo()].OrigArgIndex;
+ if (Ins[VA.getValNo()].isOrigArg()) {
+ std::advance(CurOrigArg,
+ Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
+ CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
+ }
// Arguments stored in registers.
if (VA.isRegLoc()) {
EVT RegVT = VA.getLocVT();
@@ -3129,9 +3122,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
else if (RegVT == MVT::v2f64)
RC = &ARM::QPRRegClass;
else if (RegVT == MVT::i32)
- RC = AFI->isThumb1OnlyFunction() ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
+ RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass;
else
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
@@ -3169,7 +3161,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
assert(VA.isMemLoc());
assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
- int index = ArgLocs[i].getValNo();
+ int index = VA.getValNo();
// Some Ins[] entries become multiple ArgLoc[] entries.
// Process them only once.
@@ -3182,6 +3174,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Since they could be overwritten by lowering of arguments in case of
// a tail call.
if (Flags.isByVal()) {
+ assert(Ins[index].isOrigArg() &&
+ "Byval arguments cannot be implicit");
unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
@@ -3596,8 +3590,8 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// inverting the compare condition, swapping 'less' and 'greater') and
// sometimes need to swap the operands to the VSEL (which inverts the
// condition in the sense of firing whenever the previous condition didn't)
- if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
- TrueVal.getValueType() == MVT::f64)) {
+ if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
@@ -3616,8 +3610,8 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
FPCCToARMCC(CC, CondCode, CondCode2);
// Try to generate VSEL on ARMv8.
- if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
- TrueVal.getValueType() == MVT::f64)) {
+ if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
// We can select VMAXNM/VMINNM from a compare followed by a select with the
// same operands, as follows:
// c = fcmp [ogt, olt, ugt, ult] a, b
@@ -4483,6 +4477,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
+ EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
EVT VT = Op.getValueType();
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
SDLoc dl(Op);
@@ -4512,8 +4507,8 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
TmpOp0 = Op0;
TmpOp1 = Op1;
Opc = ISD::OR;
- Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
- Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
+ Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
+ Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
break;
case ISD::SETUO: Invert = true; // Fallthrough
case ISD::SETO:
@@ -4521,8 +4516,8 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
TmpOp0 = Op0;
TmpOp1 = Op1;
Opc = ISD::OR;
- Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
- Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
+ Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
+ Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);
break;
}
} else {
@@ -4556,8 +4551,8 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
Opc = ARMISD::VTST;
- Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
- Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
+ Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
+ Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
Invert = !Invert;
}
}
@@ -4583,22 +4578,24 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
if (SingleOp.getNode()) {
switch (Opc) {
case ARMISD::VCEQ:
- Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
+ Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCGE:
- Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
+ Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCLEZ:
- Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
+ Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCGT:
- Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
+ Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCLTZ:
- Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
+ Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
default:
- Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
}
} else {
- Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
}
+ Result = DAG.getSExtOrTrunc(Result, dl, VT);
+
if (Invert)
Result = DAG.getNOT(dl, Result, VT);
@@ -6497,8 +6494,7 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
void ARMTargetLowering::
SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
MachineBasicBlock *DispatchBB, int FI) const {
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
@@ -6515,9 +6511,8 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
- const TargetRegisterClass *TRC = isThumb ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
+ const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass;
// Grab constant pool and fixed stack memory operands.
MachineMemOperand *CPMMO =
@@ -6613,8 +6608,7 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
MachineBasicBlock *ARMTargetLowering::
EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
@@ -6622,9 +6616,8 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
MachineFrameInfo *MFI = MF->getFrameInfo();
int FI = MFI->getFunctionContextIndex();
- const TargetRegisterClass *TRC = Subtarget->isThumb() ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRnopcRegClass;
+ const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
+ : &ARM::GPRnopcRegClass;
// Get a mapping of the call site numbers to all of the landing pads they're
// associated with.
@@ -7129,8 +7122,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
// This pseudo instruction has 3 operands: dst, src, size
// We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
// Otherwise, we will generate unrolled scalar copies.
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = BB;
++It;
@@ -7156,9 +7148,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
UnitSize = 2;
} else {
// Check whether we can use NEON instructions.
- if (!MF->getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NoImplicitFloat) &&
+ if (!MF->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) &&
Subtarget->hasNEON()) {
if ((Align % 16 == 0) && SizeVal >= 16)
UnitSize = 16;
@@ -7172,14 +7162,11 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
// Select the correct opcode and register class for unit size load/store
bool IsNeon = UnitSize >= 8;
- TRC = (IsThumb1 || IsThumb2) ? (const TargetRegisterClass *)&ARM::tGPRRegClass
- : (const TargetRegisterClass *)&ARM::GPRRegClass;
+ TRC = (IsThumb1 || IsThumb2) ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
if (IsNeon)
- VecTRC = UnitSize == 16
- ? (const TargetRegisterClass *)&ARM::DPairRegClass
- : UnitSize == 8
- ? (const TargetRegisterClass *)&ARM::DPRRegClass
- : nullptr;
+ VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
+ : UnitSize == 8 ? &ARM::DPRRegClass
+ : nullptr;
unsigned BytesLeft = SizeVal % UnitSize;
unsigned LoopSize = SizeVal - BytesLeft;
@@ -7364,7 +7351,7 @@ MachineBasicBlock *
ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
MachineBasicBlock *MBB) const {
const TargetMachine &TM = getTargetMachine();
- const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
assert(Subtarget->isTargetWindows() &&
@@ -7429,8 +7416,7 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
switch (MI->getOpcode()) {
@@ -7627,9 +7613,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineRegisterInfo &MRI = Fn->getRegInfo();
// In Thumb mode S must not be specified if source register is the SP or
// PC and if destination register is the SP, so restrict register class
- unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ?
- (const TargetRegisterClass*)&ARM::rGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass);
+ unsigned NewRsbDstReg =
+ MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
// Transfer the remainder of BB and its successor edges to sinkMBB.
SinkBB->splice(SinkBB->begin(), BB,
@@ -7694,8 +7679,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// Rename pseudo opcodes.
unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
if (NewOpc) {
- const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
- getTargetMachine().getSubtargetImpl()->getInstrInfo());
+ const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
MCID = &TII->get(NewOpc);
assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
@@ -8059,29 +8043,35 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
else
IsLeftOperandMUL = true;
if (MULOp == SDValue())
- return SDValue();
+ return SDValue();
// Figure out the right opcode.
unsigned Opc = MULOp->getOpcode();
unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
// Figure out the high and low input values to the MLAL node.
- SDValue* HiMul = &MULOp;
SDValue* HiAdd = nullptr;
SDValue* LoMul = nullptr;
SDValue* LowAdd = nullptr;
+ // Ensure that ADDE is from high result of ISD::SMUL_LOHI.
+ if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1)))
+ return SDValue();
+
if (IsLeftOperandMUL)
HiAdd = &AddeOp1;
else
HiAdd = &AddeOp0;
- if (AddcOp0->getOpcode() == Opc) {
+ // Ensure that LoMul and LowAdd are taken from correct ISD::SMUL_LOHI node
+ // whose low result is fed to the ADDC we are checking.
+
+ if (AddcOp0 == MULOp.getValue(0)) {
LoMul = &AddcOp0;
LowAdd = &AddcOp1;
}
- if (AddcOp1->getOpcode() == Opc) {
+ if (AddcOp1 == MULOp.getValue(0)) {
LoMul = &AddcOp1;
LowAdd = &AddcOp0;
}
@@ -8089,9 +8079,6 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
if (!LoMul)
return SDValue();
- if (LoMul->getNode() != HiMul->getNode())
- return SDValue();
-
// Create the merged node.
SelectionDAG &DAG = DCI.DAG;
@@ -8583,7 +8570,10 @@ static SDValue PerformBFICombine(SDNode *N,
unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
unsigned LSB = countTrailingZeros(~InvMask);
unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB;
- unsigned Mask = (1 << Width)-1;
+ assert(Width <
+ static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
+ "undefined behavior");
+ unsigned Mask = (1u << Width) - 1;
unsigned Mask2 = N11C->getZExtValue();
if ((Mask & (~Mask2)) == 0)
return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
@@ -8655,147 +8645,6 @@ static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-/// PerformSTORECombine - Target-specific dag combine xforms for
-/// ISD::STORE.
-static SDValue PerformSTORECombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- StoreSDNode *St = cast<StoreSDNode>(N);
- if (St->isVolatile())
- return SDValue();
-
- // Optimize trunc store (of multiple scalars) to shuffle and store. First,
- // pack all of the elements in one place. Next, store to memory in fewer
- // chunks.
- SDValue StVal = St->getValue();
- EVT VT = StVal.getValueType();
- if (St->isTruncatingStore() && VT.isVector()) {
- SelectionDAG &DAG = DCI.DAG;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT StVT = St->getMemoryVT();
- unsigned NumElems = VT.getVectorNumElements();
- assert(StVT != VT && "Cannot truncate to the same type");
- unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
- unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
-
- // From, To sizes and ElemCount must be pow of two
- if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
-
- // We are going to use the original vector elt for storing.
- // Accumulated smaller vector elements must be a multiple of the store size.
- if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
-
- unsigned SizeRatio = FromEltSz / ToEltSz;
- assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
-
- // Create a type on which we perform the shuffle.
- EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
- NumElems*SizeRatio);
- assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
-
- SDLoc DL(St);
- SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
- SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i < NumElems; ++i)
- ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio;
-
- // Can't shuffle using an illegal type.
- if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
-
- SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
- DAG.getUNDEF(WideVec.getValueType()),
- ShuffleVec.data());
- // At this point all of the data is stored at the bottom of the
- // register. We now need to save it to mem.
-
- // Find the largest store unit
- MVT StoreType = MVT::i8;
- for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
- tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
- MVT Tp = (MVT::SimpleValueType)tp;
- if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
- StoreType = Tp;
- }
- // Didn't find a legal store type.
- if (!TLI.isTypeLegal(StoreType))
- return SDValue();
-
- // Bitcast the original vector into a vector of store-size units
- EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
- StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
- assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
- SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
- SmallVector<SDValue, 8> Chains;
- SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
- TLI.getPointerTy());
- SDValue BasePtr = St->getBasePtr();
-
- // Perform one or more big stores into memory.
- unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
- for (unsigned I = 0; I < E; I++) {
- SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- StoreType, ShuffWide,
- DAG.getIntPtrConstant(I));
- SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
- St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
- BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
- Increment);
- Chains.push_back(Ch);
- }
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
- }
-
- if (!ISD::isNormalStore(St))
- return SDValue();
-
- // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
- // ARM stores of arguments in the same cache line.
- if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
- StVal.getNode()->hasOneUse()) {
- SelectionDAG &DAG = DCI.DAG;
- bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian();
- SDLoc DL(St);
- SDValue BasePtr = St->getBasePtr();
- SDValue NewST1 = DAG.getStore(St->getChain(), DL,
- StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ),
- BasePtr, St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
-
- SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
- DAG.getConstant(4, MVT::i32));
- return DAG.getStore(NewST1.getValue(0), DL,
- StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
- OffsetPtr, St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(),
- std::min(4U, St->getAlignment() / 2));
- }
-
- if (StVal.getValueType() != MVT::i64 ||
- StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
- return SDValue();
-
- // Bitcast an i64 store extracted from a vector to f64.
- // Otherwise, the i64 value will be legalized to a pair of i32 values.
- SelectionDAG &DAG = DCI.DAG;
- SDLoc dl(StVal);
- SDValue IntVec = StVal.getOperand(0);
- EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
- IntVec.getValueType().getVectorNumElements());
- SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
- SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
- Vec, StVal.getOperand(1));
- dl = SDLoc(N);
- SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
- // Make the DAGCombiner fold the bitcasts.
- DCI.AddToWorklist(Vec.getNode());
- DCI.AddToWorklist(ExtElt.getNode());
- DCI.AddToWorklist(V.getNode());
- return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
- St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment(),
- St->getAAInfo());
-}
-
/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
/// are normal, non-volatile loads. If so, it is profitable to bitcast an
/// i64 vector to have f64 elements, since the value can then be loaded
@@ -9016,18 +8865,20 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
DAG.getUNDEF(VT), NewMask.data());
}
-/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
-/// NEON load/store intrinsics to merge base address updates.
+/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
+/// NEON load/store intrinsics, and generic vector load/stores, to merge
+/// base address updates.
+/// For generic load/stores, the memory type is assumed to be a vector.
+/// The caller is assumed to have checked legality.
static SDValue CombineBaseUpdate(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
- if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
- return SDValue();
-
SelectionDAG &DAG = DCI.DAG;
- bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
- N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
- unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
+ const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
+ N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+ const bool isStore = N->getOpcode() == ISD::STORE;
+ const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
SDValue Addr = N->getOperand(AddrOpIdx);
+ MemSDNode *MemN = cast<MemSDNode>(N);
// Search for a use of the address operand that is an increment.
for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
@@ -9043,7 +8894,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
continue;
// Find the new opcode for the updating load/store.
- bool isLoad = true;
+ bool isLoadOp = true;
bool isLaneOp = false;
unsigned NewOpc = 0;
unsigned NumVecs = 0;
@@ -9066,19 +8917,19 @@ static SDValue CombineBaseUpdate(SDNode *N,
case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
NumVecs = 4; isLaneOp = true; break;
case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
- NumVecs = 1; isLoad = false; break;
+ NumVecs = 1; isLoadOp = false; break;
case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
- NumVecs = 2; isLoad = false; break;
+ NumVecs = 2; isLoadOp = false; break;
case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
- NumVecs = 3; isLoad = false; break;
+ NumVecs = 3; isLoadOp = false; break;
case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
- NumVecs = 4; isLoad = false; break;
+ NumVecs = 4; isLoadOp = false; break;
case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
- NumVecs = 2; isLoad = false; isLaneOp = true; break;
+ NumVecs = 2; isLoadOp = false; isLaneOp = true; break;
case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
- NumVecs = 3; isLoad = false; isLaneOp = true; break;
+ NumVecs = 3; isLoadOp = false; isLaneOp = true; break;
case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
- NumVecs = 4; isLoad = false; isLaneOp = true; break;
+ NumVecs = 4; isLoadOp = false; isLaneOp = true; break;
}
} else {
isLaneOp = true;
@@ -9087,15 +8938,24 @@ static SDValue CombineBaseUpdate(SDNode *N,
case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
+ case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD;
+ NumVecs = 1; isLaneOp = false; break;
+ case ISD::STORE: NewOpc = ARMISD::VST1_UPD;
+ NumVecs = 1; isLaneOp = false; isLoadOp = false; break;
}
}
// Find the size of memory referenced by the load/store.
EVT VecTy;
- if (isLoad)
+ if (isLoadOp) {
VecTy = N->getValueType(0);
- else
+ } else if (isIntrinsic) {
VecTy = N->getOperand(AddrOpIdx+1).getValueType();
+ } else {
+ assert(isStore && "Node has to be a load, a store, or an intrinsic!");
+ VecTy = N->getOperand(1).getValueType();
+ }
+
unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
if (isLaneOp)
NumBytes /= VecTy.getVectorNumElements();
@@ -9112,32 +8972,99 @@ static SDValue CombineBaseUpdate(SDNode *N,
continue;
}
+ // OK, we found an ADD we can fold into the base update.
+ // Now, create a _UPD node, taking care of not breaking alignment.
+
+ EVT AlignedVecTy = VecTy;
+ unsigned Alignment = MemN->getAlignment();
+
+ // If this is a less-than-standard-aligned load/store, change the type to
+ // match the standard alignment.
+ // The alignment is overlooked when selecting _UPD variants; and it's
+ // easier to introduce bitcasts here than fix that.
+ // There are 3 ways to get to this base-update combine:
+ // - intrinsics: they are assumed to be properly aligned (to the standard
+ // alignment of the memory type), so we don't need to do anything.
+ // - ARMISD::VLDx nodes: they are only generated from the aforementioned
+ // intrinsics, so, likewise, there's nothing to do.
+ // - generic load/store instructions: the alignment is specified as an
+ // explicit operand, rather than implicitly as the standard alignment
+ // of the memory type (like the intrisics). We need to change the
+ // memory type to match the explicit alignment. That way, we don't
+ // generate non-standard-aligned ARMISD::VLDx nodes.
+ if (isa<LSBaseSDNode>(N)) {
+ if (Alignment == 0)
+ Alignment = 1;
+ if (Alignment < VecTy.getScalarSizeInBits() / 8) {
+ MVT EltTy = MVT::getIntegerVT(Alignment * 8);
+ assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
+ assert(!isLaneOp && "Unexpected generic load/store lane.");
+ unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
+ AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
+ }
+ // Don't set an explicit alignment on regular load/stores that we want
+ // to transform to VLD/VST 1_UPD nodes.
+ // This matches the behavior of regular load/stores, which only get an
+ // explicit alignment if the MMO alignment is larger than the standard
+ // alignment of the memory type.
+ // Intrinsics, however, always get an explicit alignment, set to the
+ // alignment of the MMO.
+ Alignment = 1;
+ }
+
// Create the new updating load/store node.
+ // First, create an SDVTList for the new updating node's results.
EVT Tys[6];
- unsigned NumResultVecs = (isLoad ? NumVecs : 0);
+ unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
unsigned n;
for (n = 0; n < NumResultVecs; ++n)
- Tys[n] = VecTy;
+ Tys[n] = AlignedVecTy;
Tys[n++] = MVT::i32;
Tys[n] = MVT::Other;
SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
+
+ // Then, gather the new node's operands.
SmallVector<SDValue, 8> Ops;
Ops.push_back(N->getOperand(0)); // incoming chain
Ops.push_back(N->getOperand(AddrOpIdx));
Ops.push_back(Inc);
- for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
- Ops.push_back(N->getOperand(i));
+
+ if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
+ // Try to match the intrinsic's signature
+ Ops.push_back(StN->getValue());
+ } else {
+ // Loads (and of course intrinsics) match the intrinsics' signature,
+ // so just add all but the alignment operand.
+ for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands() - 1; ++i)
+ Ops.push_back(N->getOperand(i));
+ }
+
+ // For all node types, the alignment operand is always the last one.
+ Ops.push_back(DAG.getConstant(Alignment, MVT::i32));
+
+ // If this is a non-standard-aligned STORE, the penultimate operand is the
+ // stored value. Bitcast it to the aligned type.
+ if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
+ SDValue &StVal = Ops[Ops.size()-2];
+ StVal = DAG.getNode(ISD::BITCAST, SDLoc(N), AlignedVecTy, StVal);
}
- MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
+
SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
- Ops, MemInt->getMemoryVT(),
- MemInt->getMemOperand());
+ Ops, AlignedVecTy,
+ MemN->getMemOperand());
// Update the uses.
- std::vector<SDValue> NewResults;
- for (unsigned i = 0; i < NumResultVecs; ++i) {
+ SmallVector<SDValue, 5> NewResults;
+ for (unsigned i = 0; i < NumResultVecs; ++i)
NewResults.push_back(SDValue(UpdN.getNode(), i));
+
+ // If this is an non-standard-aligned LOAD, the first result is the loaded
+ // value. Bitcast it to the expected result type.
+ if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
+ SDValue &LdVal = NewResults[0];
+ LdVal = DAG.getNode(ISD::BITCAST, SDLoc(N), VecTy, LdVal);
}
+
NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
DCI.CombineTo(N, NewResults);
DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
@@ -9147,6 +9074,14 @@ static SDValue CombineBaseUpdate(SDNode *N,
return SDValue();
}
+static SDValue PerformVLDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ return CombineBaseUpdate(N, DCI);
+}
+
/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
@@ -9260,6 +9195,164 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
}
+static SDValue PerformLOADCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
+
+ // If this is a legal vector load, try to combine it into a VLD1_UPD.
+ if (ISD::isNormalLoad(N) && VT.isVector() &&
+ DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return CombineBaseUpdate(N, DCI);
+
+ return SDValue();
+}
+
+/// PerformSTORECombine - Target-specific dag combine xforms for
+/// ISD::STORE.
+static SDValue PerformSTORECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ if (St->isVolatile())
+ return SDValue();
+
+ // Optimize trunc store (of multiple scalars) to shuffle and store. First,
+ // pack all of the elements in one place. Next, store to memory in fewer
+ // chunks.
+ SDValue StVal = St->getValue();
+ EVT VT = StVal.getValueType();
+ if (St->isTruncatingStore() && VT.isVector()) {
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT StVT = St->getMemoryVT();
+ unsigned NumElems = VT.getVectorNumElements();
+ assert(StVT != VT && "Cannot truncate to the same type");
+ unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
+ unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
+
+ // From, To sizes and ElemCount must be pow of two
+ if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
+
+ // We are going to use the original vector elt for storing.
+ // Accumulated smaller vector elements must be a multiple of the store size.
+ if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
+
+ unsigned SizeRatio = FromEltSz / ToEltSz;
+ assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
+
+ // Create a type on which we perform the shuffle.
+ EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
+ NumElems*SizeRatio);
+ assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ SDLoc DL(St);
+ SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i < NumElems; ++i)
+ ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio;
+
+ // Can't shuffle using an illegal type.
+ if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
+ DAG.getUNDEF(WideVec.getValueType()),
+ ShuffleVec.data());
+ // At this point all of the data is stored at the bottom of the
+ // register. We now need to save it to mem.
+
+ // Find the largest store unit
+ MVT StoreType = MVT::i8;
+ for (MVT Tp : MVT::integer_valuetypes()) {
+ if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
+ StoreType = Tp;
+ }
+ // Didn't find a legal store type.
+ if (!TLI.isTypeLegal(StoreType))
+ return SDValue();
+
+ // Bitcast the original vector into a vector of store-size units
+ EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
+ StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
+ assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
+ SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
+ SmallVector<SDValue, 8> Chains;
+ SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
+ TLI.getPointerTy());
+ SDValue BasePtr = St->getBasePtr();
+
+ // Perform one or more big stores into memory.
+ unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
+ for (unsigned I = 0; I < E; I++) {
+ SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ StoreType, ShuffWide,
+ DAG.getIntPtrConstant(I));
+ SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+ BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
+ Increment);
+ Chains.push_back(Ch);
+ }
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ }
+
+ if (!ISD::isNormalStore(St))
+ return SDValue();
+
+ // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
+ // ARM stores of arguments in the same cache line.
+ if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
+ StVal.getNode()->hasOneUse()) {
+ SelectionDAG &DAG = DCI.DAG;
+ bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian();
+ SDLoc DL(St);
+ SDValue BasePtr = St->getBasePtr();
+ SDValue NewST1 = DAG.getStore(St->getChain(), DL,
+ StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ),
+ BasePtr, St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+
+ SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+ DAG.getConstant(4, MVT::i32));
+ return DAG.getStore(NewST1.getValue(0), DL,
+ StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
+ OffsetPtr, St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(),
+ std::min(4U, St->getAlignment() / 2));
+ }
+
+ if (StVal.getValueType() == MVT::i64 &&
+ StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+
+ // Bitcast an i64 store extracted from a vector to f64.
+ // Otherwise, the i64 value will be legalized to a pair of i32 values.
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(StVal);
+ SDValue IntVec = StVal.getOperand(0);
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+ IntVec.getValueType().getVectorNumElements());
+ SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
+ SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ Vec, StVal.getOperand(1));
+ dl = SDLoc(N);
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(Vec.getNode());
+ DCI.AddToWorklist(ExtElt.getNode());
+ DCI.AddToWorklist(V.getNode());
+ return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment(),
+ St->getAAInfo());
+ }
+
+ // If this is a legal vector store, try to combine it into a VST1_UPD.
+ if (ISD::isNormalStore(N) && VT.isVector() &&
+ DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return CombineBaseUpdate(N, DCI);
+
+ return SDValue();
+}
+
// isConstVecPow2 - Return true if each vector element is a power of 2, all
// elements are the same constant, C, and Log2(C) ranges from 1 to 32.
static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
@@ -9316,16 +9409,18 @@ static SDValue PerformVCVTCombine(SDNode *N,
MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
- if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) {
+ unsigned NumLanes = Op.getValueType().getVectorNumElements();
+ if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32 ||
+ NumLanes > 4) {
// These instructions only exist converting from f32 to i32. We can handle
// smaller integers by generating an extra truncate, but larger ones would
- // be lossy.
+ // be lossy. We also can't handle more then 4 lanes, since these intructions
+ // only support v2i32/v4i32 types.
return SDValue();
}
unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
Intrinsic::arm_neon_vcvtfp2fxu;
- unsigned NumLanes = Op.getValueType().getVectorNumElements();
SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
@@ -9848,10 +9943,11 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
+ case ISD::LOAD: return PerformLOADCombine(N, DCI);
case ARMISD::VLD2DUP:
case ARMISD::VLD3DUP:
case ARMISD::VLD4DUP:
- return CombineBaseUpdate(N, DCI);
+ return PerformVLDCombine(N, DCI);
case ARMISD::BUILD_VECTOR:
return PerformARMBUILD_VECTORCombine(N, DCI);
case ISD::INTRINSIC_VOID:
@@ -9871,7 +9967,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane:
- return CombineBaseUpdate(N, DCI);
+ return PerformVLDCombine(N, DCI);
default: break;
}
break;
@@ -9934,10 +10030,8 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
const Function *F = MF.getFunction();
// See if we can use NEON instructions for this...
- if ((!IsMemset || ZeroMemset) &&
- Subtarget->hasNEON() &&
- !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NoImplicitFloat)) {
+ if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() &&
+ !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
bool Fast;
if (Size >= 16 &&
(memOpAlign(SrcAlign, DstAlign, 16) ||
@@ -10535,7 +10629,8 @@ ARMTargetLowering::getSingleConstraintMatchWeight(
typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
RCPair
-ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ARMTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ const std::string &Constraint,
MVT VT) const {
if (Constraint.size() == 1) {
// GCC ARM Constraint Letters
@@ -10581,7 +10676,7 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
if (StringRef("{cc}").equals_lower(Constraint))
return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
- return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
@@ -10861,11 +10956,7 @@ bool ARM::isBitFieldInvertedMask(unsigned v) {
// there can be 1's on either or both "outsides", all the "inside"
// bits must be 0's
- unsigned TO = CountTrailingOnes_32(v);
- unsigned LO = CountLeadingOnes_32(v);
- v = (v >> TO) << TO;
- v = (v << LO) >> LO;
- return v == 0;
+ return isShiftedMask_32(~v);
}
/// isFPImmLegal - Returns true if the target can instruction select the
@@ -11114,7 +11205,7 @@ bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
// This has so far only been implemented for MachO.
bool ARMTargetLowering::useLoadStackGuardNode() const {
- return Subtarget->getTargetTriple().getObjectFormat() == Triple::MachO;
+ return Subtarget->isTargetMachO();
}
bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
@@ -11274,7 +11365,9 @@ static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
return (Members > 0 && Members <= 4);
}
-/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate.
+/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
+/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
+/// passing according to AAPCS rules.
bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
if (getEffectiveCallingConv(CallConv, isVarArg) !=
@@ -11283,7 +11376,9 @@ bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
HABaseType Base = HA_UNKNOWN;
uint64_t Members = 0;
- bool result = isHomogeneousAggregate(Ty, Base, Members);
- DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump());
- return result;
+ bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
+ DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
+
+ bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
+ return IsHA || IsIntArray;
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 89b0c31..ec1407d 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -232,7 +232,8 @@ namespace llvm {
class ARMTargetLowering : public TargetLowering {
public:
- explicit ARMTargetLowering(const TargetMachine &TM);
+ explicit ARMTargetLowering(const TargetMachine &TM,
+ const ARMSubtarget &STI);
unsigned getJumpTableEncoding() const override;
@@ -332,9 +333,10 @@ namespace llvm {
ConstraintWeight getSingleConstraintMatchWeight(
AsmOperandInfo &info, const char *constraint) const override;
- std::pair<unsigned, const TargetRegisterClass*>
- getRegForInlineAsmConstraint(const std::string &Constraint,
- MVT VT) const override;
+ std::pair<unsigned, const TargetRegisterClass *>
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ const std::string &Constraint,
+ MVT VT) const override;
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops. If hasMemory is
@@ -352,10 +354,6 @@ namespace llvm {
/// specified value type.
const TargetRegisterClass *getRegClassFor(MVT VT) const override;
- /// getMaximalGlobalOffset - Returns the maximal possible offset which can
- /// be used for loads / stores from the global.
- unsigned getMaximalGlobalOffset() const override;
-
/// Returns true if a cast between SrcAS and DestAS is a noop.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
// Addrspacecasts are always noops.
@@ -414,8 +412,9 @@ namespace llvm {
unsigned &Cost) const override;
protected:
- std::pair<const TargetRegisterClass*, uint8_t>
- findRepresentativeClass(MVT VT) const override;
+ std::pair<const TargetRegisterClass *, uint8_t>
+ findRepresentativeClass(const TargetRegisterInfo *TRI,
+ MVT VT) const override;
private:
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 17d1ffa..bc617f0 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -93,7 +93,7 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
Reloc::Model RM) const {
MachineFunction &MF = *MI->getParent()->getParent();
- const ARMSubtarget &Subtarget = MF.getTarget().getSubtarget<ARMSubtarget>();
+ const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
if (!Subtarget.useMovt(MF)) {
if (RM == Reloc::PIC_)
@@ -144,21 +144,20 @@ namespace {
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
if (AFI->getGlobalBaseReg() == 0)
return false;
-
- const ARMTargetMachine *TM =
- static_cast<const ARMTargetMachine *>(&MF.getTarget());
- if (TM->getRelocationModel() != Reloc::PIC_)
+ const ARMSubtarget &STI =
+ static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ const TargetMachine &TM = MF.getTarget();
+ if (TM.getRelocationModel() != Reloc::PIC_)
return false;
LLVMContext *Context = &MF.getFunction()->getContext();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- unsigned PCAdj = TM->getSubtarget<ARMSubtarget>().isThumb() ? 4 : 8;
+ unsigned PCAdj = STI.isThumb() ? 4 : 8;
ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(
*Context, "_GLOBAL_OFFSET_TABLE_", ARMPCLabelIndex, PCAdj);
- unsigned Align =
- TM->getSubtargetImpl()->getDataLayout()->getPrefTypeAlignment(
- Type::getInt32PtrTy(*Context));
+ unsigned Align = TM.getDataLayout()->getPrefTypeAlignment(
+ Type::getInt32PtrTy(*Context));
unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align);
MachineBasicBlock &FirstMBB = MF.front();
@@ -166,9 +165,8 @@ namespace {
DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
unsigned TempReg =
MF.getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
- unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ?
- ARM::t2LDRpci : ARM::LDRcp;
- const TargetInstrInfo &TII = *TM->getSubtargetImpl()->getInstrInfo();
+ unsigned Opc = STI.isThumb2() ? ARM::t2LDRpci : ARM::LDRcp;
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL,
TII.get(Opc), TempReg)
.addConstantPoolIndex(Idx);
@@ -178,15 +176,13 @@ namespace {
// Fix the GOT address by adding pc.
unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
- Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ? ARM::tPICADD
- : ARM::PICADD;
+ Opc = STI.isThumb2() ? ARM::tPICADD : ARM::PICADD;
MIB = BuildMI(FirstMBB, MBBI, DL, TII.get(Opc), GlobalBaseReg)
.addReg(TempReg)
.addImm(ARMPCLabelIndex);
if (Opc == ARM::PICADD)
AddDefaultPred(MIB);
-
return true;
}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 3177114..126c552 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -263,8 +263,6 @@ def IsNotMClass : Predicate<"!Subtarget->isMClass()">,
"!armv*m">;
def IsARM : Predicate<"!Subtarget->isThumb()">,
AssemblerPredicate<"!ModeThumb", "arm-mode">;
-def IsIOS : Predicate<"Subtarget->isTargetIOS()">;
-def IsNotIOS : Predicate<"!Subtarget->isTargetIOS()">;
def IsMachO : Predicate<"Subtarget->isTargetMachO()">;
def IsNotMachO : Predicate<"!Subtarget->isTargetMachO()">;
def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
@@ -333,24 +331,6 @@ def imm16_31 : ImmLeaf<i32, [{
return (int32_t)Imm >= 16 && (int32_t)Imm < 32;
}]>;
-def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; }
-def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
- unsigned Value = -(unsigned)N->getZExtValue();
- return Value && ARM_AM::getSOImmVal(Value) != -1;
- }], imm_neg_XFORM> {
- let ParserMatchClass = so_imm_neg_asmoperand;
-}
-
-// Note: this pattern doesn't require an encoder method and such, as it's
-// only used on aliases (Pat<> and InstAlias<>). The actual encoding
-// is handled by the destination instructions, which use so_imm.
-def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; }
-def so_imm_not : Operand<i32>, PatLeaf<(imm), [{
- return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
- }], imm_not_XFORM> {
- let ParserMatchClass = so_imm_not_asmoperand;
-}
-
// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
def sext_16_node : PatLeaf<(i32 GPR:$a), [{
return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
@@ -530,7 +510,7 @@ def shift_imm : Operand<i32> {
let ParserMatchClass = ShifterImmAsmOperand;
}
-// shifter_operand operands: so_reg_reg, so_reg_imm, and so_imm.
+// shifter_operand operands: so_reg_reg, so_reg_imm, and mod_imm.
def ShiftedRegAsmOperand : AsmOperandClass { let Name = "RegShiftedReg"; }
def so_reg_reg : Operand<i32>, // reg reg imm
ComplexPattern<i32, 3, "SelectRegShifterOperand",
@@ -575,27 +555,43 @@ def shift_so_reg_imm : Operand<i32>, // reg reg imm
let MIOperandInfo = (ops GPR, i32imm);
}
-
-// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
-// 8-bit immediate rotated by an arbitrary number of bits.
-def SOImmAsmOperand: ImmAsmOperand { let Name = "ARMSOImm"; }
-def so_imm : Operand<i32>, ImmLeaf<i32, [{
+// mod_imm: match a 32-bit immediate operand, which can be encoded into
+// a 12-bit immediate; an 8-bit integer and a 4-bit rotator (See ARMARM
+// - "Modified Immediate Constants"). Within the MC layer we keep this
+// immediate in its encoded form.
+def ModImmAsmOperand: AsmOperandClass {
+ let Name = "ModImm";
+ let ParserMethod = "parseModImm";
+}
+def mod_imm : Operand<i32>, ImmLeaf<i32, [{
return ARM_AM::getSOImmVal(Imm) != -1;
}]> {
- let EncoderMethod = "getSOImmOpValue";
- let ParserMatchClass = SOImmAsmOperand;
- let DecoderMethod = "DecodeSOImmOperand";
+ let EncoderMethod = "getModImmOpValue";
+ let PrintMethod = "printModImmOperand";
+ let ParserMatchClass = ModImmAsmOperand;
}
-// Break so_imm's up into two pieces. This handles immediates with up to 16
-// bits set in them. This uses so_imm2part to match and so_imm2part_[12] to
-// get the first/second pieces.
-def so_imm2part : PatLeaf<(imm), [{
- return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
-}]>;
+// Note: the patterns mod_imm_not and mod_imm_neg do not require an encoder
+// method and such, as they are only used on aliases (Pat<> and InstAlias<>).
+// The actual parsing, encoding, decoding are handled by the destination
+// instructions, which use mod_imm.
-/// arm_i32imm - True for +V6T2, or true only if so_imm2part is true.
-///
+def ModImmNotAsmOperand : AsmOperandClass { let Name = "ModImmNot"; }
+def mod_imm_not : Operand<i32>, PatLeaf<(imm), [{
+ return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
+ }], imm_not_XFORM> {
+ let ParserMatchClass = ModImmNotAsmOperand;
+}
+
+def ModImmNegAsmOperand : AsmOperandClass { let Name = "ModImmNeg"; }
+def mod_imm_neg : Operand<i32>, PatLeaf<(imm), [{
+ unsigned Value = -(unsigned)N->getZExtValue();
+ return Value && ARM_AM::getSOImmVal(Value) != -1;
+ }], imm_neg_XFORM> {
+ let ParserMatchClass = ModImmNegAsmOperand;
+}
+
+/// arm_i32imm - True for +V6T2, or when isSOImmTwoParVal()
def arm_i32imm : PatLeaf<(imm), [{
if (Subtarget->useMovt(*MF))
return true;
@@ -1204,7 +1200,7 @@ include "ARMInstrFormats.td"
// Multiclass helpers...
//
-/// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
+/// AsI1_bin_irs - Defines a set of (op r, {mod_imm|r|so_reg}) patterns for a
/// binop that produces a value.
let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AsI1_bin_irs<bits<4> opcod, string opc,
@@ -1213,9 +1209,9 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
// The register-immediate version is re-materializable. This is useful
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
- def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm), DPFrm,
iii, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
+ [(set GPR:$Rd, (opnode GPR:$Rn, mod_imm:$imm))]>,
Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
@@ -1286,9 +1282,9 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
// The register-immediate version is re-materializable. This is useful
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
- def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm), DPFrm,
iii, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]>,
+ [(set GPR:$Rd, (opnode mod_imm:$imm, GPR:$Rn))]>,
Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
@@ -1356,9 +1352,9 @@ let hasPostISelHook = 1, Defs = [CPSR] in {
multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
InstrItinClass iis, PatFrag opnode,
bit Commutable = 0> {
- def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
+ def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm, pred:$p),
4, iii,
- [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>,
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, mod_imm:$imm))]>,
Sched<[WriteALU, ReadALU]>;
def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, pred:$p),
@@ -1389,9 +1385,9 @@ let hasPostISelHook = 1, Defs = [CPSR] in {
multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
InstrItinClass iis, PatFrag opnode,
bit Commutable = 0> {
- def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
+ def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm, pred:$p),
4, iii,
- [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>,
+ [(set GPR:$Rd, CPSR, (opnode mod_imm:$imm, GPR:$Rn))]>,
Sched<[WriteALU, ReadALU]>;
def rsi : ARMPseudoInst<(outs GPR:$Rd),
@@ -1410,16 +1406,16 @@ multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
}
}
-/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// AI1_cmp_irs - Defines a set of (op r, {mod_imm|r|so_reg}) cmp / test
/// patterns. Similar to AsI1_bin_irs except the instruction does not produce
/// a explicit result, only implicitly set CPSR.
let isCompare = 1, Defs = [CPSR] in {
multiclass AI1_cmp_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
PatFrag opnode, bit Commutable = 0> {
- def ri : AI1<opcod, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii,
+ def ri : AI1<opcod, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, iii,
opc, "\t$Rn, $imm",
- [(opnode GPR:$Rn, so_imm:$imm)]>,
+ [(opnode GPR:$Rn, mod_imm:$imm)]>,
Sched<[WriteCMP, ReadALU]> {
bits<4> Rn;
bits<12> imm;
@@ -1547,9 +1543,9 @@ let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
- def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm),
DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm, CPSR))]>,
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, mod_imm:$imm, CPSR))]>,
Requires<[IsARM]>,
Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
@@ -1617,9 +1613,9 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
- def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm),
DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn, CPSR))]>,
+ [(set GPR:$Rd, CPSR, (opnode mod_imm:$imm, GPR:$Rn, CPSR))]>,
Requires<[IsARM]>,
Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
@@ -1813,7 +1809,7 @@ multiclass AI_str1nopc<bit isByte, string opc, InstrItinClass iii,
/// the function. The first operand is the ID# for this instruction, the second
/// is the index into the MachineConstantPool that this is, the third is the
/// size in bytes of this constant pool entry.
-let neverHasSideEffects = 1, isNotDuplicable = 1 in
+let hasSideEffects = 0, isNotDuplicable = 1 in
def CONSTPOOL_ENTRY :
PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
i32imm:$size), NoItinerary, []>;
@@ -2057,7 +2053,7 @@ def PICSTRB : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
// LEApcrel - Load a pc-relative address into a register without offending the
// assembler.
-let neverHasSideEffects = 1, isReMaterializable = 1 in
+let hasSideEffects = 0, isReMaterializable = 1 in
// The 'adr' mnemonic encodes differently if the label is before or after
// the instruction. The {24-21} opcode bits are set by the fixup, as we don't
// know until then which form of the instruction will be used.
@@ -2387,6 +2383,33 @@ def RFEIB_UPD : RFEI<1, "rfeib\t$Rn!"> {
let Inst{24-23} = 0b11;
}
+// Hypervisor Call is a system instruction
+let isCall = 1 in {
+def HVC : AInoP< (outs), (ins imm0_65535:$imm), BrFrm, NoItinerary,
+ "hvc", "\t$imm", []>,
+ Requires<[IsARM, HasVirtualization]> {
+ bits<16> imm;
+
+ // Even though HVC isn't predicable, it's encoding includes a condition field.
+ // The instruction is undefined if the condition field is 0xf otherwise it is
+ // unpredictable if it isn't condition AL (0xe).
+ let Inst{31-28} = 0b1110;
+ let Unpredictable{31-28} = 0b1111;
+ let Inst{27-24} = 0b0001;
+ let Inst{23-20} = 0b0100;
+ let Inst{19-8} = imm{15-4};
+ let Inst{7-4} = 0b0111;
+ let Inst{3-0} = imm{3-0};
+}
+}
+
+// Return from exception in Hypervisor mode.
+let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in
+def ERET : ABI<0b0001, (outs), (ins), NoItinerary, "eret", "", []>,
+ Requires<[IsARM, HasVirtualization]> {
+ let Inst{23-0} = 0b011000000000000001101110;
+}
+
//===----------------------------------------------------------------------===//
// Load / Store Instructions.
//
@@ -2404,7 +2427,7 @@ defm STRB : AI_str1nopc<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si,
BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
// Special LDR for loads from non-pc-relative constpools.
-let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
+let canFoldAsLoad = 1, mayLoad = 1, hasSideEffects = 0,
isReMaterializable = 1, isCodeGenOnly = 1 in
def LDRcp : AI2ldst<0b010, 1, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr",
@@ -2431,7 +2454,7 @@ def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr",
[(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>;
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode3:$addr),
LdMiscFrm, IIC_iLoad_d_r, "ldrd", "\t$Rt, $Rt2, $addr", []>,
@@ -2508,7 +2531,7 @@ multiclass AI2_ldridx<bit isByte, string opc,
}
-let mayLoad = 1, neverHasSideEffects = 1 in {
+let mayLoad = 1, hasSideEffects = 0 in {
// FIXME: for LDR_PRE_REG etc. the itineray should be either IIC_iLoad_ru or
// IIC_iLoad_siu depending on whether it the offset register is shifted.
defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_iu, IIC_iLoad_ru>;
@@ -2544,7 +2567,7 @@ multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> {
}
}
-let mayLoad = 1, neverHasSideEffects = 1 in {
+let mayLoad = 1, hasSideEffects = 0 in {
defm LDRH : AI3_ldridx<0b1011, "ldrh", IIC_iLoad_bh_ru>;
defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>;
defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>;
@@ -2577,10 +2600,10 @@ def LDRD_POST: AI3ldstidx<0b1101, 0, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
let DecoderMethod = "DecodeAddrMode3Instruction";
}
} // hasExtraDefRegAllocReq = 1
-} // mayLoad = 1, neverHasSideEffects = 1
+} // mayLoad = 1, hasSideEffects = 0
// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT.
-let mayLoad = 1, neverHasSideEffects = 1 in {
+let mayLoad = 1, hasSideEffects = 0 in {
def LDRT_POST_REG : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$addr, am2offset_reg:$offset),
IndexModePost, LdFrm, IIC_iLoad_ru,
@@ -2699,7 +2722,7 @@ def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm,
[(truncstorei16 GPR:$Rt, addrmode3:$addr)]>;
// Store doubleword
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr),
StMiscFrm, IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", []>,
Requires<[IsARM, HasV5TE]> {
@@ -2772,7 +2795,7 @@ multiclass AI2_stridx<bit isByte, string opc,
}
}
-let mayStore = 1, neverHasSideEffects = 1 in {
+let mayStore = 1, hasSideEffects = 0 in {
// FIXME: for STR_PRE_REG etc. the itineray should be either IIC_iStore_ru or
// IIC_iStore_siu depending on whether it the offset register is shifted.
defm STR : AI2_stridx<0, "str", IIC_iStore_iu, IIC_iStore_ru>;
@@ -2864,7 +2887,7 @@ def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb),
let DecoderMethod = "DecodeAddrMode3Instruction";
}
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb),
(ins GPR:$Rt, GPR:$Rt2, addrmode3_pre:$addr),
IndexModePre, StMiscFrm, IIC_iStore_d_ru,
@@ -2894,7 +2917,7 @@ def STRD_POST: AI3ldstidx<0b1111, 0, 0, (outs GPR:$Rn_wb),
let Inst{3-0} = offset{3-0}; // imm3_0/Rm
let DecoderMethod = "DecodeAddrMode3Instruction";
}
-} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
// STRT, STRBT, and STRHT
@@ -2938,7 +2961,7 @@ def STRBT_POST
: ARMAsmPseudo<"strbt${q} $Rt, $addr",
(ins GPR:$Rt, addr_offset_none:$addr, pred:$q)>;
-let mayStore = 1, neverHasSideEffects = 1 in {
+let mayStore = 1, hasSideEffects = 0 in {
def STRT_POST_REG : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb),
(ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
IndexModePost, StFrm, IIC_iStore_ru,
@@ -3103,17 +3126,18 @@ multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f,
}
}
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m,
- IIC_iLoad_mu>;
+ IIC_iLoad_mu>, ComplexDeprecationPredicate<"ARMLoad">;
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
defm STM : arm_ldst_mult<"stm", "", 0, 0, LdStMulFrm, IIC_iStore_m,
- IIC_iStore_mu>;
+ IIC_iStore_mu>,
+ ComplexDeprecationPredicate<"ARMStore">;
-} // neverHasSideEffects
+} // hasSideEffects
// FIXME: remove when we have a way to marking a MI with these properties.
// FIXME: Should pc be an implicit operand like PICADD, etc?
@@ -3139,7 +3163,7 @@ defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m,
// Move Instructions.
//
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
"mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> {
bits<4> Rd;
@@ -3153,7 +3177,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
}
// A version for the smaller set of tail call registers.
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> {
bits<4> Rd;
@@ -3197,8 +3221,8 @@ def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src),
}
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
-def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi,
- "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP,
+def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins mod_imm:$imm), DPFrm, IIC_iMOVi,
+ "mov", "\t$Rd, $imm", [(set GPR:$Rd, mod_imm:$imm)]>, UnaryDP,
Sched<[WriteALU]> {
bits<4> Rd;
bits<12> imm;
@@ -3408,10 +3432,10 @@ defm RSC : AI1_rsc_irs<0b0111, "rsc",
// assume opposite meanings of the carry flag (i.e., carry == !borrow).
// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
// details.
-def : ARMPat<(add GPR:$src, so_imm_neg:$imm),
- (SUBri GPR:$src, so_imm_neg:$imm)>;
-def : ARMPat<(ARMaddc GPR:$src, so_imm_neg:$imm),
- (SUBSri GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(add GPR:$src, mod_imm_neg:$imm),
+ (SUBri GPR:$src, mod_imm_neg:$imm)>;
+def : ARMPat<(ARMaddc GPR:$src, mod_imm_neg:$imm),
+ (SUBSri GPR:$src, mod_imm_neg:$imm)>;
def : ARMPat<(add GPR:$src, imm0_65535_neg:$imm),
(SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>,
@@ -3423,8 +3447,8 @@ def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm),
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
-def : ARMPat<(ARMadde GPR:$src, so_imm_not:$imm, CPSR),
- (SBCri GPR:$src, so_imm_not:$imm)>;
+def : ARMPat<(ARMadde GPR:$src, mod_imm_not:$imm, CPSR),
+ (SBCri GPR:$src, mod_imm_not:$imm)>;
def : ARMPat<(ARMadde GPR:$src, imm0_65535_neg:$imm, CPSR),
(SBCrr GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>,
Requires<[IsARM, HasV6T2]>;
@@ -3705,9 +3729,9 @@ def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift),
let Inst{3-0} = shift{3-0};
}
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
-def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm,
+def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins mod_imm:$imm), DPFrm,
IIC_iMVNi, "mvn", "\t$Rd, $imm",
- [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP, Sched<[WriteALU]> {
+ [(set GPR:$Rd, mod_imm_not:$imm)]>,UnaryDP, Sched<[WriteALU]> {
bits<4> Rd;
bits<12> imm;
let Inst{25} = 1;
@@ -3716,8 +3740,8 @@ def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm,
let Inst{11-0} = imm;
}
-def : ARMPat<(and GPR:$src, so_imm_not:$imm),
- (BICri GPR:$src, so_imm_not:$imm)>;
+def : ARMPat<(and GPR:$src, mod_imm_not:$imm),
+ (BICri GPR:$src, mod_imm_not:$imm)>;
//===----------------------------------------------------------------------===//
// Multiply Instructions.
@@ -3811,7 +3835,7 @@ def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
}
// Extra precision multiplies with low / high results
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
let isCommutable = 1 in {
def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
@@ -3878,7 +3902,7 @@ def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
Requires<[IsARM, NoV6]>;
}
-} // neverHasSideEffects
+} // hasSideEffects
// Most significant word multiply
def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
@@ -4242,8 +4266,8 @@ defm CMP : AI1_cmp_irs<0b1010, "cmp",
BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
// ARMcmpZ can re-use the above instruction definitions.
-def : ARMPat<(ARMcmpZ GPR:$src, so_imm:$imm),
- (CMPri GPR:$src, so_imm:$imm)>;
+def : ARMPat<(ARMcmpZ GPR:$src, mod_imm:$imm),
+ (CMPri GPR:$src, mod_imm:$imm)>;
def : ARMPat<(ARMcmpZ GPR:$src, GPR:$rhs),
(CMPrr GPR:$src, GPR:$rhs)>;
def : ARMPat<(ARMcmpZ GPR:$src, so_reg_imm:$rhs),
@@ -4253,9 +4277,9 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs),
// CMN register-integer
let isCompare = 1, Defs = [CPSR] in {
-def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iCMPi,
+def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, IIC_iCMPi,
"cmn", "\t$Rn, $imm",
- [(ARMcmn GPR:$Rn, so_imm:$imm)]>,
+ [(ARMcmn GPR:$Rn, mod_imm:$imm)]>,
Sched<[WriteCMP, ReadALU]> {
bits<4> Rn;
bits<12> imm;
@@ -4328,11 +4352,11 @@ def CMNzrsr : AI1<0b1011, (outs),
}
-def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
- (CMNri GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(ARMcmp GPR:$src, mod_imm_neg:$imm),
+ (CMNri GPR:$src, mod_imm_neg:$imm)>;
-def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
- (CMNri GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(ARMcmpZ GPR:$src, mod_imm_neg:$imm),
+ (CMNri GPR:$src, mod_imm_neg:$imm)>;
// Note that TST/TEQ don't set all the same flags that CMP does!
defm TST : AI1_cmp_irs<0b1000, "tst",
@@ -4359,7 +4383,7 @@ def BCCZi64 : PseudoInst<(outs),
// Conditional moves
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
let isCommutable = 1, isSelect = 1 in
def MOVCCr : ARMPseudoInst<(outs GPR:$Rd),
@@ -4396,9 +4420,9 @@ def MOVCCi16
let isMoveImm = 1 in
def MOVCCi : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, so_imm:$imm, cmovpred:$p),
+ (ins GPR:$false, mod_imm:$imm, cmovpred:$p),
4, IIC_iCMOVi,
- [(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, mod_imm:$imm,
cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
@@ -4414,13 +4438,13 @@ def MOVCCi32imm
let isMoveImm = 1 in
def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, so_imm:$imm, cmovpred:$p),
+ (ins GPR:$false, mod_imm:$imm, cmovpred:$p),
4, IIC_iCMOVi,
- [(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, mod_imm_not:$imm,
cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
-} // neverHasSideEffects
+} // hasSideEffects
//===----------------------------------------------------------------------===//
@@ -5074,7 +5098,7 @@ def MRSbanked : ABI<0b0001, (outs GPRnopc:$Rd), (ins banked_reg:$banked),
let Inst{23} = 0;
let Inst{22} = banked{5}; // R bit
- let Inst{21-20} = 0b10;
+ let Inst{21-20} = 0b00;
let Inst{19-16} = banked{3-0};
let Inst{15-12} = Rd;
let Inst{11-9} = 0b001;
@@ -5103,17 +5127,17 @@ def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary,
let Inst{3-0} = Rn;
}
-def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary,
- "msr", "\t$mask, $a", []> {
+def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, mod_imm:$imm), NoItinerary,
+ "msr", "\t$mask, $imm", []> {
bits<5> mask;
- bits<12> a;
+ bits<12> imm;
let Inst{23} = 0;
let Inst{22} = mask{4}; // R bit
let Inst{21-20} = 0b10;
let Inst{19-16} = mask{3-0};
let Inst{15-12} = 0b1111;
- let Inst{11-0} = a;
+ let Inst{11-0} = imm;
}
// However, the MSR (banked register) system instruction (ARMv7VE) *does* have a
@@ -5204,7 +5228,7 @@ let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
NoItinerary,
[(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
- Requires<[IsARM, IsIOS]>;
+ Requires<[IsARM]>;
}
// eh.sjlj.dispatchsetup pseudo-instruction.
@@ -5228,7 +5252,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in
// Large immediate handling.
-// 32-bit immediate using two piece so_imms or movw + movt.
+// 32-bit immediate using two piece mod_imms or movw + movt.
// This is a single pseudo instruction, the benefit is that it can be remat'd
// as a single unit instead of having to handle reg inputs.
// FIXME: Remove this when we can do generalized remat.
@@ -5257,6 +5281,7 @@ def LDRLIT_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
(ARMWrapperPIC tglobaladdr:$addr))]>,
Requires<[IsARM, DontUseMovt]>;
+let AddedComplexity = 10 in
def LDRLIT_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
NoItinerary,
[(set GPR:$dst,
@@ -5519,36 +5544,36 @@ def : MnemonicAlias<"uqsubaddx", "uqsax">;
// USAX == USUBADDX
def : MnemonicAlias<"usubaddx", "usax">;
-// "mov Rd, so_imm_not" can be handled via "mvn" in assembly, just like
+// "mov Rd, mod_imm_not" can be handled via "mvn" in assembly, just like
// for isel.
def : ARMInstAlias<"mov${s}${p} $Rd, $imm",
- (MVNi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>;
+ (MVNi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
def : ARMInstAlias<"mvn${s}${p} $Rd, $imm",
- (MOVi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>;
+ (MOVi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
// Same for AND <--> BIC
def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm",
- (ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ (ANDri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : ARMInstAlias<"bic${s}${p} $Rdn, $imm",
- (ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ (ANDri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm",
- (BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ (BICri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : ARMInstAlias<"and${s}${p} $Rdn, $imm",
- (BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ (BICri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
-// Likewise, "add Rd, so_imm_neg" -> sub
+// Likewise, "add Rd, mod_imm_neg" -> sub
def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm",
- (SUBri GPR:$Rd, GPR:$Rn, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
+ (SUBri GPR:$Rd, GPR:$Rn, mod_imm_neg:$imm, pred:$p, cc_out:$s)>;
def : ARMInstAlias<"add${s}${p} $Rd, $imm",
- (SUBri GPR:$Rd, GPR:$Rd, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
-// Same for CMP <--> CMN via so_imm_neg
+ (SUBri GPR:$Rd, GPR:$Rd, mod_imm_neg:$imm, pred:$p, cc_out:$s)>;
+// Same for CMP <--> CMN via mod_imm_neg
def : ARMInstAlias<"cmp${p} $Rd, $imm",
- (CMNri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
+ (CMNri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>;
def : ARMInstAlias<"cmn${p} $Rd, $imm",
- (CMPri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
+ (CMPri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>;
// The shifter forms of the MOV instruction are aliased to the ASR, LSL,
// LSR, ROR, and RRX instructions.
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index a0c627c..2a7b4b5 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -665,7 +665,7 @@ class VLDQQQQWBPseudo<InstrItinClass itin>
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
"$addr.addr = $wb, $src = $dst">;
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
// VLD1 : Vector Load (multiple single elements)
class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
@@ -1023,7 +1023,7 @@ def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
-} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
// Classes for VLD*LN pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
@@ -1106,7 +1106,7 @@ def : Pat<(vector_insert (v4f32 QPR:$src),
(f32 (load addrmode6:$addr)), imm:$lane),
(VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
// ...with address register writeback:
class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1359,7 +1359,7 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
-} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
// VLD1DUP : Vector Load (single element to all lanes)
class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
@@ -1405,7 +1405,7 @@ def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load,
def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
(VLD1DUPq32 addrmode6:$addr)>;
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
// ...with address register writeback:
multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
@@ -1609,9 +1609,9 @@ def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
-} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
// Classes for VST* pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
@@ -2025,7 +2025,7 @@ def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
-} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
// Classes for VST*LN pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
@@ -2129,7 +2129,7 @@ def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
// VST2LN : Vector Store (single 2-element structure from one lane)
class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -2351,7 +2351,7 @@ def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
-} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
// Use vld1/vst1 for unaligned f64 load / store
def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index a867844..3c62e0e 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -714,7 +714,7 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
//
// These require base address to be written back or one of the loaded regs.
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
@@ -754,7 +754,7 @@ def tSTMIA_UPD : Thumb1I<(outs GPR:$wb),
let Inst{7-0} = regs;
}
-} // neverHasSideEffects
+} // hasSideEffects
def : InstAlias<"ldm${p} $Rn!, $regs",
(tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)>,
@@ -888,7 +888,7 @@ def tADDrr : // A8.6.6 T1
"add", "\t$Rd, $Rn, $Rm",
[(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
"add", "\t$Rdn, $Rm", []>,
T1Special<{0,0,?,?}>, Sched<[WriteALU]> {
@@ -1048,7 +1048,7 @@ def : tInstAlias <"movs $Rdn, $imm",
// A7-73: MOV(2) - mov setting flag.
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone,
2, IIC_iMOVr,
"mov", "\t$Rd, $Rm", "", []>,
@@ -1070,7 +1070,7 @@ def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
let Inst{5-3} = Rm;
let Inst{2-0} = Rd;
}
-} // neverHasSideEffects
+} // hasSideEffects
// Multiply register
let isCommutable = 1 in
@@ -1248,7 +1248,7 @@ def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p),
let DecoderMethod = "DecodeThumbAddSpecialReg";
}
-let neverHasSideEffects = 1, isReMaterializable = 1 in
+let hasSideEffects = 0, isReMaterializable = 1 in
def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p),
2, IIC_iALUi, []>, Sched<[WriteALU]>;
@@ -1297,7 +1297,7 @@ def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
AddrModeNone, 0, IndexModeNone,
Pseudo, NoItinerary, "", "",
[(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
- Requires<[IsThumb, IsIOS]>;
+ Requires<[IsThumb]>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -1375,6 +1375,17 @@ def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr),
def : T1Pat<(zextloadi1 t_addrmode_is1:$addr),
(tLDRBi t_addrmode_is1:$addr)>;
+// extload from the stack -> word load from the stack, as it avoids having to
+// materialize the base in a separate register. This only works when a word
+// load puts the byte/halfword value in the same place in the register that the
+// byte/halfword load would, i.e. when little-endian.
+def : T1Pat<(extloadi1 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>,
+ Requires<[IsThumb, IsThumb1Only, IsLE]>;
+def : T1Pat<(extloadi8 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>,
+ Requires<[IsThumb, IsThumb1Only, IsLE]>;
+def : T1Pat<(extloadi16 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>,
+ Requires<[IsThumb, IsThumb1Only, IsLE]>;
+
// extload -> zextload
def : T1Pat<(extloadi1 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 807c252..10b0a0e 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1185,7 +1185,8 @@ class T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode>
class T2I_exta_rrot_np<bits<3> opcod, string opc>
: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm,rot_imm:$rot),
- IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", []> {
+ IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", []>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
bits<2> rot;
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -1241,7 +1242,7 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
let DecoderMethod = "DecodeT2Adr";
}
-let neverHasSideEffects = 1, isReMaterializable = 1 in
+let hasSideEffects = 0, isReMaterializable = 1 in
def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p),
4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>;
let hasSideEffects = 1 in
@@ -1272,12 +1273,12 @@ defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
GPRnopc, UnOpFrag<(sextloadi8 node:$Src)>>;
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
(ins t2addrmode_imm8s4:$addr),
IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>;
-} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
// zextload i1 -> zextload i8
def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr),
@@ -1326,7 +1327,7 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
// Indexed loads
-let mayLoad = 1, neverHasSideEffects = 1 in {
+let mayLoad = 1, hasSideEffects = 0 in {
def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_iu,
@@ -1378,7 +1379,7 @@ def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
"ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
-} // mayLoad = 1, neverHasSideEffects = 1
+} // mayLoad = 1, hasSideEffects = 0
// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110).
// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
@@ -1443,14 +1444,14 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
rGPR, BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
// Store doubleword
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
+let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
(ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>;
// Indexed stores
-let mayStore = 1, neverHasSideEffects = 1 in {
+let mayStore = 1, hasSideEffects = 0 in {
def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb),
(ins GPRnopc:$Rt, t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
@@ -1468,7 +1469,7 @@ def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb),
AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu,
"strb", "\t$Rt, $addr!",
"$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>;
-} // mayStore = 1, neverHasSideEffects = 1
+} // mayStore = 1, hasSideEffects = 0
def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
(ins GPRnopc:$Rt, addr_offset_none:$Rn,
@@ -1763,7 +1764,7 @@ multiclass thumb2_ld_mult<string asm, InstrItinClass itin,
}
}
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
defm t2LDM : thumb2_ld_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>;
@@ -1848,14 +1849,14 @@ multiclass thumb2_st_mult<string asm, InstrItinClass itin,
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
defm t2STM : thumb2_st_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
-} // neverHasSideEffects
+} // hasSideEffects
//===----------------------------------------------------------------------===//
// Move Instructions.
//
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr,
"mov", ".w\t$Rd, $Rm", []>, Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
@@ -2572,7 +2573,7 @@ def t2MLS: T2FourReg<
}
// Extra precision multiplies with low / high results
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
let isCommutable = 1 in {
def t2SMULL : T2MulLong<0b000, 0b0000,
(outs rGPR:$RdLo, rGPR:$RdHi),
@@ -2603,7 +2604,7 @@ def t2UMAAL : T2MulLong<0b110, 0b0110,
(ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
"umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
Requires<[IsThumb2, HasThumb2DSP]>;
-} // neverHasSideEffects
+} // hasSideEffects
// Rounding variants of the below included for disassembly only
@@ -3150,7 +3151,7 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
// Conditional moves
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
let isCommutable = 1, isSelect = 1 in
def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
@@ -3213,7 +3214,7 @@ def t2MOVCCi32imm
RegConstraint<"$false = $dst">;
} // isCodeGenOnly = 1
-} // neverHasSideEffects
+} // hasSideEffects
//===----------------------------------------------------------------------===//
// Atomic operations intrinsics
@@ -3824,6 +3825,27 @@ def t2SUBS_PC_LR : T2I <(outs), (ins imm0_255:$imm), NoItinerary,
let Inst{7-0} = imm;
}
+// Hypervisor Call is a system instruction.
+let isCall = 1 in {
+def t2HVC : T2XI <(outs), (ins imm0_65535:$imm16), IIC_Br, "hvc.w\t$imm16", []>,
+ Requires<[IsThumb2, HasVirtualization]>, Sched<[WriteBr]> {
+ bits<16> imm16;
+ let Inst{31-20} = 0b111101111110;
+ let Inst{19-16} = imm16{15-12};
+ let Inst{15-12} = 0b1000;
+ let Inst{11-0} = imm16{11-0};
+}
+}
+
+// Alias for HVC without the ".w" optional width specifier
+def : t2InstAlias<"hvc\t$imm16", (t2HVC imm0_65535:$imm16)>;
+
+// ERET - Return from exception in Hypervisor mode.
+// B9.3.3, B9.3.20: ERET is an alias for "SUBS PC, LR, #0" in an implementation that
+// includes virtualization extensions.
+def t2ERET : InstAlias<"eret${p}", (t2SUBS_PC_LR 0, pred:$p)>,
+ Requires<[IsThumb2, HasVirtualization]>;
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//
@@ -4564,17 +4586,21 @@ def : t2InstAlias<"strh${p} $Rt, $addr",
(t2STRHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
// Extend instruction optional rotate operand.
-def : t2InstAlias<"sxtab${p} $Rd, $Rn, $Rm",
- (t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
-def : t2InstAlias<"sxtah${p} $Rd, $Rn, $Rm",
- (t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
-def : t2InstAlias<"sxtab16${p} $Rd, $Rn, $Rm",
- (t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : InstAlias<"sxtab${p} $Rd, $Rn, $Rm",
+ (t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"sxtah${p} $Rd, $Rn, $Rm",
+ (t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"sxtab16${p} $Rd, $Rn, $Rm",
+ (t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"sxtb16${p} $Rd, $Rm",
+ (t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
def : t2InstAlias<"sxtb${p} $Rd, $Rm",
(t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
-def : t2InstAlias<"sxtb16${p} $Rd, $Rm",
- (t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
def : t2InstAlias<"sxth${p} $Rd, $Rm",
(t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
def : t2InstAlias<"sxtb${p}.w $Rd, $Rm",
@@ -4582,19 +4608,23 @@ def : t2InstAlias<"sxtb${p}.w $Rd, $Rm",
def : t2InstAlias<"sxth${p}.w $Rd, $Rm",
(t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
-def : t2InstAlias<"uxtab${p} $Rd, $Rn, $Rm",
- (t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
-def : t2InstAlias<"uxtah${p} $Rd, $Rn, $Rm",
- (t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
-def : t2InstAlias<"uxtab16${p} $Rd, $Rn, $Rm",
- (t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : InstAlias<"uxtab${p} $Rd, $Rn, $Rm",
+ (t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"uxtah${p} $Rd, $Rn, $Rm",
+ (t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"uxtab16${p} $Rd, $Rn, $Rm",
+ (t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"uxtb16${p} $Rd, $Rm",
+ (t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
def : t2InstAlias<"uxtb${p} $Rd, $Rm",
(t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
-def : t2InstAlias<"uxtb16${p} $Rd, $Rm",
- (t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
def : t2InstAlias<"uxth${p} $Rd, $Rm",
(t2UXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
-
def : t2InstAlias<"uxtb${p}.w $Rd, $Rm",
(t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
def : t2InstAlias<"uxth${p}.w $Rd, $Rm",
@@ -4603,15 +4633,17 @@ def : t2InstAlias<"uxth${p}.w $Rd, $Rm",
// Extend instruction w/o the ".w" optional width specifier.
def : t2InstAlias<"uxtb${p} $Rd, $Rm$rot",
(t2UXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
-def : t2InstAlias<"uxtb16${p} $Rd, $Rm$rot",
- (t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : InstAlias<"uxtb16${p} $Rd, $Rm$rot",
+ (t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
def : t2InstAlias<"uxth${p} $Rd, $Rm$rot",
(t2UXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
def : t2InstAlias<"sxtb${p} $Rd, $Rm$rot",
(t2SXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
-def : t2InstAlias<"sxtb16${p} $Rd, $Rm$rot",
- (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : InstAlias<"sxtb16${p} $Rd, $Rm$rot",
+ (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
def : t2InstAlias<"sxth${p} $Rd, $Rm$rot",
(t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index d78f2ac..e0a9314 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -194,7 +194,7 @@ multiclass vfp_ldst_mult<string asm, bit L_bit,
}
}
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>;
@@ -202,7 +202,7 @@ defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>;
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpStore_m, IIC_fpStore_mu>;
-} // neverHasSideEffects
+} // hasSideEffects
def : MnemonicAlias<"vldm", "vldmia">;
def : MnemonicAlias<"vstm", "vstmia">;
@@ -769,7 +769,7 @@ def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
[(set SPR:$Sd, (fsqrt SPR:$Sm))]>;
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>;
@@ -777,7 +777,7 @@ def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>;
-} // neverHasSideEffects
+} // hasSideEffects
//===----------------------------------------------------------------------===//
// FP <-> GPR Copies. Int <-> FP Conversions.
@@ -827,7 +827,7 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010,
let D = VFPNeonDomain;
}
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def VMOVRRD : AVConv3I<0b11000101, 0b1011,
(outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
@@ -876,7 +876,7 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010,
let D = VFPNeonDomain;
let DecoderMethod = "DecodeVMOVRRS";
}
-} // neverHasSideEffects
+} // hasSideEffects
// FMDHR: GPR -> SPR
// FMDLR: GPR -> SPR
@@ -907,7 +907,7 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
let isRegSequence = 1;
}
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
def VMOVSRR : AVConv5I<0b11000100, 0b1010,
(outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
@@ -1543,7 +1543,7 @@ def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
// FP Conditional moves.
//
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
def VMOVDcc : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p),
IIC_fpUNA64,
[(set (f64 DPR:$Dd),
@@ -1555,7 +1555,7 @@ def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p),
[(set (f32 SPR:$Sd),
(ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>,
RegConstraint<"$Sn = $Sd">, Requires<[HasVFP2]>;
-} // neverHasSideEffects
+} // hasSideEffects
//===----------------------------------------------------------------------===//
// Move from VFP System Register to ARM core register.
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index c429ac1..a8d0981 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -170,7 +170,8 @@ static int getMemoryOpOffset(const MachineInstr *MI) {
return OffField;
// Thumb1 immediate offsets are scaled by 4
- if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
+ if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
+ Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
return OffField * 4;
int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
@@ -206,6 +207,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::ib: return ARM::STMIB;
}
case ARM::tLDRi:
+ case ARM::tLDRspi:
// tLDMIA is writeback-only - unless the base register is in the input
// reglist.
++NumLDMGened;
@@ -214,6 +216,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::ia: return ARM::tLDMIA;
}
case ARM::tSTRi:
+ case ARM::tSTRspi:
// There is no non-writeback tSTMIA either.
++NumSTMGened;
switch (Mode) {
@@ -328,7 +331,7 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
} // end namespace llvm
static bool isT1i32Load(unsigned Opc) {
- return Opc == ARM::tLDRi;
+ return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
}
static bool isT2i32Load(unsigned Opc) {
@@ -340,7 +343,7 @@ static bool isi32Load(unsigned Opc) {
}
static bool isT1i32Store(unsigned Opc) {
- return Opc == ARM::tSTRi;
+ return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
}
static bool isT2i32Store(unsigned Opc) {
@@ -356,6 +359,8 @@ static unsigned getImmScale(unsigned Opc) {
default: llvm_unreachable("Unhandled opcode!");
case ARM::tLDRi:
case ARM::tSTRi:
+ case ARM::tLDRspi:
+ case ARM::tSTRspi:
return 1;
case ARM::tLDRHi:
case ARM::tSTRHi:
@@ -495,6 +500,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
if (isThumb1)
for (unsigned I = 0; I < NumRegs; ++I)
if (Base == Regs[I].first) {
+ assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
if (Opcode == ARM::tLDRi) {
Writeback = false;
break;
@@ -515,7 +521,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
} else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
// VLDM/VSTM do not support DB mode without also updating the base reg.
Mode = ARM_AM::db;
- } else if (Offset != 0) {
+ } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
// Check if this is a supported opcode before inserting instructions to
// calculate a new base register.
if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
@@ -545,6 +551,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
int BaseOpc =
isThumb2 ? ARM::t2ADDri :
+ (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
(isThumb1 && Offset < 8) ? ARM::tADDi3 :
isThumb1 ? ARM::tADDi8 : ARM::ADDri;
@@ -552,7 +559,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
Offset = - Offset;
BaseOpc =
isThumb2 ? ARM::t2SUBri :
- (isThumb1 && Offset < 8) ? ARM::tSUBi3 :
+ (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
}
@@ -566,18 +573,34 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
// or
// MOV NewBase, Base
// ADDS NewBase, #imm8.
- if (Base != NewBase && Offset >= 8) {
+ if (Base != NewBase &&
+ (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
// Need to insert a MOV to the new base first.
- BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
- .addReg(Base, getKillRegState(BaseKill))
- .addImm(Pred).addReg(PredReg);
+ if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
+ !STI->hasV6Ops()) {
+ // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
+ if (Pred != ARMCC::AL)
+ return false;
+ BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
+ .addReg(Base, getKillRegState(BaseKill));
+ } else
+ BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg);
+
// Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
Base = NewBase;
BaseKill = false;
}
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
- .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
- .addImm(Pred).addReg(PredReg);
+ if (BaseOpc == ARM::tADDrSPi) {
+ assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
+ BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4)
+ .addImm(Pred).addReg(PredReg);
+ } else
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
+ .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+ .addImm(Pred).addReg(PredReg);
} else {
BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
.addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
@@ -958,6 +981,8 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
case ARM::STRi12:
case ARM::tLDRi:
case ARM::tSTRi:
+ case ARM::tLDRspi:
+ case ARM::tSTRspi:
case ARM::t2LDRi8:
case ARM::t2LDRi12:
case ARM::t2STRi8:
@@ -1393,6 +1418,8 @@ static bool isMemoryOp(const MachineInstr *MI) {
case ARM::STRi12:
case ARM::tLDRi:
case ARM::tSTRi:
+ case ARM::tLDRspi:
+ case ARM::tSTRspi:
case ARM::t2LDRi8:
case ARM::t2LDRi12:
case ARM::t2STRi8:
@@ -1787,12 +1814,11 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
}
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
- const TargetMachine &TM = Fn.getTarget();
- TL = TM.getSubtargetImpl()->getTargetLowering();
+ STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+ TL = STI->getTargetLowering();
AFI = Fn.getInfo<ARMFunctionInfo>();
- TII = TM.getSubtargetImpl()->getInstrInfo();
- TRI = TM.getSubtargetImpl()->getRegisterInfo();
- STI = &TM.getSubtarget<ARMSubtarget>();
+ TII = STI->getInstrInfo();
+ TRI = STI->getRegisterInfo();
RS = new RegScavenger();
isThumb2 = AFI->isThumb2Function();
isThumb1 = AFI->isThumbFunction() && !isThumb2;
@@ -1802,7 +1828,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
++MFI) {
MachineBasicBlock &MBB = *MFI;
Modified |= LoadStoreMultipleOpti(MBB);
- if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
+ if (STI->hasV5TOps())
Modified |= MergeReturnIntoLDM(MBB);
}
@@ -1850,10 +1876,10 @@ namespace {
}
bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
- TD = Fn.getSubtarget().getDataLayout();
- TII = Fn.getSubtarget().getInstrInfo();
- TRI = Fn.getSubtarget().getRegisterInfo();
+ TD = Fn.getTarget().getDataLayout();
STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+ TII = STI->getInstrInfo();
+ TRI = STI->getRegisterInfo();
MRI = &Fn.getRegInfo();
MF = &Fn;
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 023f5f8..fd4f5ff 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -119,11 +119,45 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP) {
OutMI.setOpcode(MI->getOpcode());
+ // In the MC layer, we keep modified immediates in their encoded form
+ bool EncodeImms = false;
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::MOVi:
+ case ARM::MVNi:
+ case ARM::CMPri:
+ case ARM::CMNri:
+ case ARM::TSTri:
+ case ARM::TEQri:
+ case ARM::MSRi:
+ case ARM::ADCri:
+ case ARM::ADDri:
+ case ARM::ADDSri:
+ case ARM::SBCri:
+ case ARM::SUBri:
+ case ARM::SUBSri:
+ case ARM::ANDri:
+ case ARM::ORRri:
+ case ARM::EORri:
+ case ARM::BICri:
+ case ARM::RSBri:
+ case ARM::RSBSri:
+ case ARM::RSCri:
+ EncodeImms = true;
+ break;
+ }
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
MCOperand MCOp;
- if (AP.lowerOperand(MO, MCOp))
+ if (AP.lowerOperand(MO, MCOp)) {
+ if (MCOp.isImm() && EncodeImms) {
+ int32_t Enc = ARM_AM::getSOImmVal(MCOp.getImm());
+ if (Enc != -1)
+ MCOp.setImm(Enc);
+ }
OutMI.addOperand(MCOp);
+ }
}
}
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index 892b269..229d041 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -14,8 +14,8 @@ using namespace llvm;
void ARMFunctionInfo::anchor() { }
ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
- : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
- hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
+ : isThumb(MF.getSubtarget<ARMSubtarget>().isThumb()),
+ hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()),
StByValParamsPadding(0), ArgRegsSaveSize(0), HasStackFrame(false),
RestoreSPFromFP(false), LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 4e67fa1..ddfdb52 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -16,10 +16,10 @@
#include "ARMSubtarget.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/DenseMap.h"
namespace llvm {
diff --git a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
index 2a49255..1c50f9e 100644
--- a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
+++ b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
@@ -9,8 +9,8 @@
//===------------------------------------------------------------------------------------------===//
#include "ARM.h"
-#include "ARMMachineFunctionInfo.h"
#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index b290e7f..45cc9ea 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -199,7 +199,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
// Thumb1 instructions that know how to use hi regs.
let AltOrders = [(add LR, GPR), (trunc GPR, 8)];
let AltOrderSelect = [{
- return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+ return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
}];
}
@@ -209,7 +209,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> {
let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)];
let AltOrderSelect = [{
- return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+ return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
}];
}
@@ -219,7 +219,7 @@ def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> {
def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), APSR_NZCV)> {
let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)];
let AltOrderSelect = [{
- return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+ return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
}];
}
@@ -237,7 +237,7 @@ def GPRsp : RegisterClass<"ARM", [i32], 32, (add SP)>;
def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
let AltOrders = [(add LR, rGPR), (trunc rGPR, 8)];
let AltOrderSelect = [{
- return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+ return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
}];
}
@@ -255,7 +255,7 @@ def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>;
def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R12)> {
let AltOrders = [(and tcGPR, tGPR)];
let AltOrderSelect = [{
- return MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+ return MF.getSubtarget<ARMSubtarget>().isThumb1Only();
}];
}
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index fa30ac3..636205f 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -32,7 +32,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const {
- const ARMSubtarget &Subtarget = DAG.getTarget().getSubtarget<ARMSubtarget>();
+ const ARMSubtarget &Subtarget =
+ DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
// Do repeated 4-byte loads and stores. To be improved.
// This requires 4-byte alignment.
if ((Align & 3) != 0)
@@ -150,14 +151,14 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
SDValue Src, SDValue Size,
unsigned Align, bool isVolatile,
MachinePointerInfo DstPtrInfo) const {
- const ARMSubtarget &Subtarget = DAG.getTarget().getSubtarget<ARMSubtarget>();
+ const ARMSubtarget &Subtarget =
+ DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
// Use default for non-AAPCS (or MachO) subtargets
if (!Subtarget.isAAPCS_ABI() || Subtarget.isTargetMachO() ||
Subtarget.isTargetWindows())
return SDValue();
- const ARMTargetLowering &TLI =
- *DAG.getTarget().getSubtarget<ARMSubtarget>().getTargetLowering();
+ const ARMTargetLowering &TLI = *Subtarget.getTargetLowering();
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 600f39d..89624dd 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -15,12 +15,14 @@
#include "ARMFrameLowering.h"
#include "ARMISelLowering.h"
#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
#include "ARMSelectionDAGInfo.h"
#include "ARMSubtarget.h"
-#include "ARMMachineFunctionInfo.h"
+#include "ARMTargetMachine.h"
#include "Thumb1FrameLowering.h"
#include "Thumb1InstrInfo.h"
#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
@@ -28,7 +30,6 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
@@ -87,56 +88,6 @@ IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
"Allow IT blocks based on ARMv7"),
clEnumValEnd));
-static std::string computeDataLayout(ARMSubtarget &ST) {
- std::string Ret = "";
-
- if (ST.isLittle())
- // Little endian.
- Ret += "e";
- else
- // Big endian.
- Ret += "E";
-
- Ret += DataLayout::getManglingComponent(ST.getTargetTriple());
-
- // Pointers are 32 bits and aligned to 32 bits.
- Ret += "-p:32:32";
-
- // ABIs other than APCS have 64 bit integers with natural alignment.
- if (!ST.isAPCS_ABI())
- Ret += "-i64:64";
-
- // We have 64 bits floats. The APCS ABI requires them to be aligned to 32
- // bits, others to 64 bits. We always try to align to 64 bits.
- if (ST.isAPCS_ABI())
- Ret += "-f64:32:64";
-
- // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others
- // to 64. We always ty to give them natural alignment.
- if (ST.isAPCS_ABI())
- Ret += "-v64:32:64-v128:32:128";
- else
- Ret += "-v128:64:128";
-
- // Try to align aggregates to 32 bits (the default is 64 bits, which has no
- // particular hardware support on 32-bit ARM).
- Ret += "-a:0:32";
-
- // Integer registers are 32 bits.
- Ret += "-n32";
-
- // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
- // aligned everywhere else.
- if (ST.isTargetNaCl())
- Ret += "-S128";
- else if (ST.isAAPCS_ABI())
- Ret += "-S64";
- else
- Ret += "-S32";
-
- return Ret;
-}
-
/// initializeSubtargetDependencies - Initializes using a CPU and feature string
/// so that we can use initializer lists for subtarget initialization.
ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
@@ -146,23 +97,31 @@ ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
return *this;
}
+ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU,
+ StringRef FS) {
+ ARMSubtarget &STI = initializeSubtargetDependencies(CPU, FS);
+ if (STI.isThumb1Only())
+ return (ARMFrameLowering *)new Thumb1FrameLowering(STI);
+
+ return new ARMFrameLowering(STI);
+}
+
ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, const TargetMachine &TM,
- bool IsLittle)
+ const std::string &FS,
+ const ARMBaseTargetMachine &TM, bool IsLittle)
: ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle),
- TargetTriple(TT), Options(TM.Options), TargetABI(ARM_ABI_UNKNOWN),
- DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))),
- TSInfo(DL),
+ TargetTriple(TT), Options(TM.Options), TM(TM),
+ TSInfo(*TM.getDataLayout()),
+ FrameLowering(initializeFrameLowering(CPU, FS)),
+ // At this point initializeSubtargetDependencies has been called so
+ // we can query directly.
InstrInfo(isThumb1Only()
? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this)
: !isThumb()
? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
: (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
- TLInfo(TM),
- FrameLowering(!isThumb1Only()
- ? new ARMFrameLowering(*this)
- : (ARMFrameLowering *)new Thumb1FrameLowering(*this)) {}
+ TLInfo(TM, *this) {}
void ARMSubtarget::initializeEnvironment() {
HasV4TOps = false;
@@ -216,7 +175,7 @@ void ARMSubtarget::initializeEnvironment() {
void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (CPUString.empty()) {
- if (isTargetIOS() && TargetTriple.getArchName().endswith("v7s"))
+ if (isTargetDarwin() && TargetTriple.getArchName().endswith("v7s"))
// Default to the Swift CPU when targeting armv7s/thumbv7s.
CPUString = "swift";
else
@@ -226,8 +185,8 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// Insert the architecture feature derived from the target triple into the
// feature string. This is important for setting features that are implied
// based on the architecture version.
- std::string ArchFS = ARM_MC::ParseARMTriple(TargetTriple.getTriple(),
- CPUString);
+ std::string ArchFS =
+ ARM_MC::ParseARMTriple(TargetTriple.getTriple(), CPUString);
if (!FS.empty()) {
if (!ArchFS.empty())
ArchFS = ArchFS + "," + FS.str();
@@ -246,30 +205,9 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUString);
- if (TargetABI == ARM_ABI_UNKNOWN) {
- switch (TargetTriple.getEnvironment()) {
- case Triple::Android:
- case Triple::EABI:
- case Triple::EABIHF:
- case Triple::GNUEABI:
- case Triple::GNUEABIHF:
- TargetABI = ARM_ABI_AAPCS;
- break;
- default:
- if (TargetTriple.isOSBinFormatMachO() &&
- TargetTriple.getOS() == Triple::UnknownOS)
- TargetABI = ARM_ABI_AAPCS;
- else
- TargetABI = ARM_ABI_APCS;
- break;
- }
- }
-
// FIXME: this is invalid for WindowsCE
- if (isTargetWindows()) {
- TargetABI = ARM_ABI_AAPCS;
+ if (isTargetWindows())
NoARM = true;
- }
if (isAAPCS_ABI())
stackAlignment = 8;
@@ -331,6 +269,15 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
UseNEONForSinglePrecisionFP = true;
}
+bool ARMSubtarget::isAPCS_ABI() const {
+ assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
+ return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_APCS;
+}
+bool ARMSubtarget::isAAPCS_ABI() const {
+ assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
+ return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS;
+}
+
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
bool
ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
@@ -402,6 +349,5 @@ bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
// immediates as it is inherently position independent, and may be out of
// range otherwise.
return UseMovt && (isTargetWindows() ||
- !MF.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::MinSize));
+ !MF.getFunction()->hasFnAttribute(Attribute::MinSize));
}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index d5ee009..f4deddf 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -20,10 +20,10 @@
#include "ARMInstrInfo.h"
#include "ARMSelectionDAGInfo.h"
#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "Thumb1FrameLowering.h"
#include "Thumb1InstrInfo.h"
#include "Thumb2InstrInfo.h"
-#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCInstrItineraries.h"
@@ -37,6 +37,7 @@ namespace llvm {
class GlobalValue;
class StringRef;
class TargetOptions;
+class ARMBaseTargetMachine;
class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
@@ -228,18 +229,14 @@ protected:
/// Options passed via command line that could influence the target
const TargetOptions &Options;
- public:
- enum {
- ARM_ABI_UNKNOWN,
- ARM_ABI_APCS,
- ARM_ABI_AAPCS // ARM EABI
- } TargetABI;
+ const ARMBaseTargetMachine &TM;
+public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, const TargetMachine &TM, bool IsLittle);
+ const std::string &FS, const ARMBaseTargetMachine &TM, bool IsLittle);
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
/// that still makes it profitable to inline the call.
@@ -254,7 +251,6 @@ protected:
/// so that we can use initializer lists for subtarget initialization.
ARMSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
- const DataLayout *getDataLayout() const override { return &DL; }
const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
@@ -272,16 +268,17 @@ protected:
}
private:
- const DataLayout DL;
ARMSelectionDAGInfo TSInfo;
+ // Either Thumb1FrameLowering or ARMFrameLowering.
+ std::unique_ptr<ARMFrameLowering> FrameLowering;
// Either Thumb1InstrInfo or Thumb2InstrInfo.
std::unique_ptr<ARMBaseInstrInfo> InstrInfo;
ARMTargetLowering TLInfo;
- // Either Thumb1FrameLowering or ARMFrameLowering.
- std::unique_ptr<ARMFrameLowering> FrameLowering;
void initializeEnvironment();
void initSubtargetFeatures(StringRef CPU, StringRef FS);
+ ARMFrameLowering *initializeFrameLowering(StringRef CPU, StringRef FS);
+
public:
void computeIssueWidth();
@@ -316,7 +313,8 @@ public:
bool hasCRC() const { return HasCRC; }
bool hasVirtualization() const { return HasVirtualization; }
bool useNEONForSinglePrecisionFP() const {
- return hasNEON() && UseNEONForSinglePrecisionFP; }
+ return hasNEON() && UseNEONForSinglePrecisionFP;
+ }
bool hasDivide() const { return HasHardwareDivide; }
bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
@@ -350,7 +348,7 @@ public:
bool isTargetIOS() const { return TargetTriple.isiOS(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
- bool isTargetNetBSD() const { return TargetTriple.getOS() == Triple::NetBSD; }
+ bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); }
bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
@@ -391,14 +389,8 @@ public:
return TargetTriple.getEnvironment() == Triple::Android;
}
- bool isAPCS_ABI() const {
- assert(TargetABI != ARM_ABI_UNKNOWN);
- return TargetABI == ARM_ABI_APCS;
- }
- bool isAAPCS_ABI() const {
- assert(TargetABI != ARM_ABI_UNKNOWN);
- return TargetABI == ARM_ABI_AAPCS;
- }
+ bool isAPCS_ABI() const;
+ bool isAAPCS_ABI() const;
bool isThumb() const { return InThumbMode; }
bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 88d6c5e..a97a058 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -11,13 +11,14 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
-#include "ARMTargetMachine.h"
#include "ARMFrameLowering.h"
+#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
+#include "ARMTargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
@@ -52,6 +53,110 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
return make_unique<ARMElfTargetObjectFile>();
}
+static ARMBaseTargetMachine::ARMABI
+computeTargetABI(const Triple &TT, StringRef CPU,
+ const TargetOptions &Options) {
+ if (Options.MCOptions.getABIName().startswith("aapcs"))
+ return ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ else if (Options.MCOptions.getABIName().startswith("apcs"))
+ return ARMBaseTargetMachine::ARM_ABI_APCS;
+
+ assert(Options.MCOptions.getABIName().empty() &&
+ "Unknown target-abi option!");
+
+ ARMBaseTargetMachine::ARMABI TargetABI =
+ ARMBaseTargetMachine::ARM_ABI_UNKNOWN;
+
+ // FIXME: This is duplicated code from the front end and should be unified.
+ if (TT.isOSBinFormatMachO()) {
+ if (TT.getEnvironment() == llvm::Triple::EABI ||
+ (TT.getOS() == llvm::Triple::UnknownOS &&
+ TT.getObjectFormat() == llvm::Triple::MachO) ||
+ CPU.startswith("cortex-m")) {
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ } else {
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
+ }
+ } else if (TT.isOSWindows()) {
+ // FIXME: this is invalid for WindowsCE
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ } else {
+ // Select the default based on the platform.
+ switch (TT.getEnvironment()) {
+ case llvm::Triple::Android:
+ case llvm::Triple::GNUEABI:
+ case llvm::Triple::GNUEABIHF:
+ case llvm::Triple::EABIHF:
+ case llvm::Triple::EABI:
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ break;
+ case llvm::Triple::GNU:
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
+ break;
+ default:
+ if (TT.getOS() == llvm::Triple::NetBSD)
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
+ else
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ break;
+ }
+ }
+
+ return TargetABI;
+}
+
+static std::string computeDataLayout(const Triple &TT,
+ ARMBaseTargetMachine::ARMABI ABI,
+ bool isLittle) {
+ std::string Ret = "";
+
+ if (isLittle)
+ // Little endian.
+ Ret += "e";
+ else
+ // Big endian.
+ Ret += "E";
+
+ Ret += DataLayout::getManglingComponent(TT);
+
+ // Pointers are 32 bits and aligned to 32 bits.
+ Ret += "-p:32:32";
+
+ // ABIs other than APCS have 64 bit integers with natural alignment.
+ if (ABI != ARMBaseTargetMachine::ARM_ABI_APCS)
+ Ret += "-i64:64";
+
+ // We have 64 bits floats. The APCS ABI requires them to be aligned to 32
+ // bits, others to 64 bits. We always try to align to 64 bits.
+ if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS)
+ Ret += "-f64:32:64";
+
+ // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others
+ // to 64. We always ty to give them natural alignment.
+ if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS)
+ Ret += "-v64:32:64-v128:32:128";
+ else
+ Ret += "-v128:64:128";
+
+ // Try to align aggregates to 32 bits (the default is 64 bits, which has no
+ // particular hardware support on 32-bit ARM).
+ Ret += "-a:0:32";
+
+ // Integer registers are 32 bits.
+ Ret += "-n32";
+
+ // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
+ // aligned everywhere else.
+ if (TT.isOSNaCl())
+ Ret += "-S128";
+ else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS)
+ Ret += "-S64";
+ else
+ Ret += "-S32";
+
+ return Ret;
+}
+
/// TargetMachine ctor - Create an ARM architecture model.
///
ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
@@ -60,6 +165,8 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool isLittle)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ TargetABI(computeTargetABI(Triple(TT), CPU, Options)),
+ DL(computeDataLayout(Triple(TT), TargetABI, isLittle)),
TLOF(createTLOF(Triple(getTargetTriple()))),
Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) {
@@ -73,11 +180,8 @@ ARMBaseTargetMachine::~ARMBaseTargetMachine() {}
const ARMSubtarget *
ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
- AttributeSet FnAttrs = F.getAttributes();
- Attribute CPUAttr =
- FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
- Attribute FSAttr =
- FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+ Attribute CPUAttr = F.getFnAttribute("target-cpu");
+ Attribute FSAttr = F.getFnAttribute("target-features");
std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
? CPUAttr.getValueAsString().str()
@@ -91,8 +195,7 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
// function before we can generate a subtarget. We also need to use
// it as a key for the subtarget since that can be the only difference
// between two functions.
- Attribute SFAttr =
- FnAttrs.getAttribute(AttributeSet::FunctionIndex, "use-soft-float");
+ Attribute SFAttr = F.getFnAttribute("use-soft-float");
bool SoftFloat = !SFAttr.hasAttribute(Attribute::None)
? SFAttr.getValueAsString() == "true"
: Options.UseSoftFloat;
@@ -109,12 +212,9 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
return I.get();
}
-void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
- // Add first the target-independent BasicTTI pass, then our ARM pass. This
- // allows the ARM pass to delegate to the target independent layer when
- // appropriate.
- PM.add(createBasicTargetTransformInfoPass(this));
- PM.add(createARMTargetTransformInfoPass(this));
+TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() {
+ return TargetIRAnalysis(
+ [this](Function &F) { return TargetTransformInfo(ARMTTIImpl(this, F)); });
}
@@ -197,9 +297,9 @@ public:
void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
- bool addPreRegAlloc() override;
- bool addPreSched2() override;
- bool addPreEmitPass() override;
+ void addPreRegAlloc() override;
+ void addPreSched2() override;
+ void addPreEmitPass() override;
};
} // namespace
@@ -226,7 +326,12 @@ void ARMPassConfig::addIRPasses() {
bool ARMPassConfig::addPreISel() {
if (TM->getOptLevel() != CodeGenOpt::None)
- addPass(createGlobalMergePass(TM));
+ // FIXME: This is using the thumb1 only constant value for
+ // maximal global offset for merging globals. We may want
+ // to look into using the old value for non-thumb1 code of
+ // 4095 based on the TargetMachine, but this starts to become
+ // tricky when doing code gen per function.
+ addPass(createGlobalMergePass(TM, 127));
return false;
}
@@ -241,7 +346,7 @@ bool ARMPassConfig::addInstSelector() {
return false;
}
-bool ARMPassConfig::addPreRegAlloc() {
+void ARMPassConfig::addPreRegAlloc() {
if (getOptLevel() != CodeGenOpt::None)
addPass(createARMLoadStoreOptimizationPass(true));
if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9())
@@ -252,13 +357,11 @@ bool ARMPassConfig::addPreRegAlloc() {
getARMSubtarget().hasNEON() && !DisableA15SDOptimization) {
addPass(createA15SDOptimizerPass());
}
- return true;
}
-bool ARMPassConfig::addPreSched2() {
+void ARMPassConfig::addPreSched2() {
if (getOptLevel() != CodeGenOpt::None) {
addPass(createARMLoadStoreOptimizationPass());
- printAndVerify("After ARM load / store optimizer");
if (getARMSubtarget().hasNEON())
addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
@@ -279,11 +382,9 @@ bool ARMPassConfig::addPreSched2() {
}
if (getARMSubtarget().isThumb2())
addPass(createThumb2ITBlockPass());
-
- return true;
}
-bool ARMPassConfig::addPreEmitPass() {
+void ARMPassConfig::addPreEmitPass() {
if (getARMSubtarget().isThumb2()) {
if (!getARMSubtarget().prefers32BitThumb())
addPass(createThumb2SizeReductionPass());
@@ -294,6 +395,4 @@ bool ARMPassConfig::addPreEmitPass() {
addPass(createARMOptimizeBarriersPass());
addPass(createARMConstantIslandPass());
-
- return true;
}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index fba0ec2..7f6a1ee 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -22,7 +22,15 @@
namespace llvm {
class ARMBaseTargetMachine : public LLVMTargetMachine {
+public:
+ enum ARMABI {
+ ARM_ABI_UNKNOWN,
+ ARM_ABI_APCS,
+ ARM_ABI_AAPCS // ARM EABI
+ } TargetABI;
+
protected:
+ const DataLayout DL;
std::unique_ptr<TargetLoweringObjectFile> TLOF;
ARMSubtarget Subtarget;
bool isLittle;
@@ -39,9 +47,11 @@ public:
const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; }
const ARMSubtarget *getSubtargetImpl(const Function &F) const override;
+ const DataLayout *getDataLayout() const override { return &DL; }
+ bool isLittleEndian() const { return isLittle; }
- /// \brief Register ARM analysis passes with a pass manager.
- void addAnalysisPasses(PassManagerBase &PM) override;
+ /// \brief Get the TargetIRAnalysis for this target.
+ TargetIRAnalysis getTargetIRAnalysis() override;
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 48238bf..80f03c6 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "ARMTargetObjectFile.h"
-#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -27,7 +27,8 @@ using namespace dwarf;
void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
- bool isAAPCS_ABI = TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI();
+ bool isAAPCS_ABI = static_cast<const ARMTargetMachine &>(TM).TargetABI ==
+ ARMTargetMachine::ARMABI::ARM_ABI_AAPCS;
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
InitializeELF(isAAPCS_ABI);
@@ -36,10 +37,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
}
AttributesSection =
- getContext().getELFSection(".ARM.attributes",
- ELF::SHT_ARM_ATTRIBUTES,
- 0,
- SectionKind::getMetadata());
+ getContext().getELFSection(".ARM.attributes", ELF::SHT_ARM_ATTRIBUTES, 0);
}
const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference(
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index ec834e8..4e1b371 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1,4 +1,4 @@
-//===-- ARMTargetTransformInfo.cpp - ARM specific TTI pass ----------------===//
+//===-- ARMTargetTransformInfo.cpp - ARM specific TTI ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,17 +6,8 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-/// \file
-/// This file implements a TargetTransformInfo analysis pass specific to the
-/// ARM target machine. It uses the target's detailed information to provide
-/// more precise answers to certain TTI queries, while letting the target
-/// independent and default TTI implementations handle the rest.
-///
-//===----------------------------------------------------------------------===//
-#include "ARM.h"
-#include "ARMTargetMachine.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
+#include "ARMTargetTransformInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
@@ -24,132 +15,7 @@ using namespace llvm;
#define DEBUG_TYPE "armtti"
-// Declare the pass initialization routine locally as target-specific passes
-// don't have a target-wide initialization entry point, and so we rely on the
-// pass constructor initialization.
-namespace llvm {
-void initializeARMTTIPass(PassRegistry &);
-}
-
-namespace {
-
-class ARMTTI final : public ImmutablePass, public TargetTransformInfo {
- const ARMBaseTargetMachine *TM;
- const ARMSubtarget *ST;
- const ARMTargetLowering *TLI;
-
- /// Estimate the overhead of scalarizing an instruction. Insert and Extract
- /// are set if the result needs to be inserted and/or extracted from vectors.
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
-
-public:
- ARMTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
- llvm_unreachable("This pass cannot be directly constructed");
- }
-
- ARMTTI(const ARMBaseTargetMachine *TM)
- : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
- TLI(TM->getSubtargetImpl()->getTargetLowering()) {
- initializeARMTTIPass(*PassRegistry::getPassRegistry());
- }
-
- void initializePass() override {
- pushTTIStack(this);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- TargetTransformInfo::getAnalysisUsage(AU);
- }
-
- /// Pass identification.
- static char ID;
-
- /// Provide necessary pointer adjustments for the two base classes.
- void *getAdjustedAnalysisPointer(const void *ID) override {
- if (ID == &TargetTransformInfo::ID)
- return (TargetTransformInfo*)this;
- return this;
- }
-
- /// \name Scalar TTI Implementations
- /// @{
- using TargetTransformInfo::getIntImmCost;
- unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
-
- /// @}
-
-
- /// \name Vector TTI Implementations
- /// @{
-
- unsigned getNumberOfRegisters(bool Vector) const override {
- if (Vector) {
- if (ST->hasNEON())
- return 16;
- return 0;
- }
-
- if (ST->isThumb1Only())
- return 8;
- return 13;
- }
-
- unsigned getRegisterBitWidth(bool Vector) const override {
- if (Vector) {
- if (ST->hasNEON())
- return 128;
- return 0;
- }
-
- return 32;
- }
-
- unsigned getMaxInterleaveFactor() const override {
- // These are out of order CPUs:
- if (ST->isCortexA15() || ST->isSwift())
- return 2;
- return 1;
- }
-
- unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
- int Index, Type *SubTp) const override;
-
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const override;
-
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) const override;
-
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) const override;
-
- unsigned getAddressComputationCost(Type *Val,
- bool IsComplex) const override;
-
- unsigned getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, OperandValueKind Op1Info = OK_AnyValue,
- OperandValueKind Op2Info = OK_AnyValue,
- OperandValueProperties Opd1PropInfo = OP_None,
- OperandValueProperties Opd2PropInfo = OP_None) const override;
-
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) const override;
- /// @}
-};
-
-} // end anonymous namespace
-
-INITIALIZE_AG_PASS(ARMTTI, TargetTransformInfo, "armtti",
- "ARM Target Transform Info", true, true, false)
-char ARMTTI::ID = 0;
-
-ImmutablePass *
-llvm::createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM) {
- return new ARMTTI(TM);
-}
-
-
-unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
+unsigned ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned Bits = Ty->getPrimitiveSizeInBits();
@@ -181,8 +47,7 @@ unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
return 3;
}
-unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const {
+unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@@ -206,7 +71,7 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
EVT DstTy = TLI->getValueType(Dst);
if (!SrcTy.isSimple() || !DstTy.isSimple())
- return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+ return BaseT::getCastInstrCost(Opcode, Dst, Src);
// Some arithmetic, load and store operations have specific instructions
// to cast up/down their types automatically at no extra cost.
@@ -377,11 +242,11 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
return ARMIntegerConversionTbl[Idx].Cost;
}
- return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+ return BaseT::getCastInstrCost(Opcode, Dst, Src);
}
-unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy,
- unsigned Index) const {
+unsigned ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ unsigned Index) {
// Penalize inserting into an D-subregister. We end up with a three times
// lower estimated throughput on swift.
if (ST->isSwift() &&
@@ -397,11 +262,11 @@ unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy,
ValTy->getVectorElementType()->isIntegerTy())
return 3;
- return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index);
+ return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
}
-unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) const {
+unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// On NEON a a vector select gets lowered to vbsl.
@@ -431,10 +296,10 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return LT.first;
}
- return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
-unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
+unsigned ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
@@ -449,13 +314,32 @@ unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
return 1;
}
-unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) const {
+unsigned ARMTTIImpl::getFPOpCost(Type *Ty) {
+ // Use similar logic that's in ARMISelLowering:
+ // Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access
+ // to VFP.
+
+ if (ST->hasVFP2() && !ST->isThumb1Only()) {
+ if (Ty->isFloatTy()) {
+ return TargetTransformInfo::TCC_Basic;
+ }
+
+ if (Ty->isDoubleTy()) {
+ return ST->isFPOnlySP() ? TargetTransformInfo::TCC_Expensive :
+ TargetTransformInfo::TCC_Basic;
+ }
+ }
+
+ return TargetTransformInfo::TCC_Expensive;
+}
+
+unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) {
// We only handle costs of reverse and alternate shuffles for now.
- if (Kind != SK_Reverse && Kind != SK_Alternate)
- return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+ if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
- if (Kind == SK_Reverse) {
+ if (Kind == TTI::SK_Reverse) {
static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
// Reverse shuffle cost one instruction if we are shuffling within a
// double word (vrev) or two if we shuffle a quad word (vrev, vext).
@@ -473,11 +357,11 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
if (Idx == -1)
- return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
return LT.first * NEONShuffleTbl[Idx].Cost;
}
- if (Kind == SK_Alternate) {
+ if (Kind == TTI::SK_Alternate) {
static const CostTblEntry<MVT::SimpleValueType> NEONAltShuffleTbl[] = {
// Alt shuffle cost table for ARM. Cost is the number of instructions
// required to create the shuffled vector.
@@ -499,16 +383,16 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
int Idx =
CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
if (Idx == -1)
- return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
return LT.first * NEONAltShuffleTbl[Idx].Cost;
}
- return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
-unsigned ARMTTI::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
- OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
- OperandValueProperties Opd2PropInfo) const {
+unsigned ARMTTIImpl::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
+ TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo) {
int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
@@ -564,8 +448,8 @@ unsigned ARMTTI::getArithmeticInstrCost(
if (Idx != -1)
return LT.first * CostTbl[Idx].Cost;
- unsigned Cost = TargetTransformInfo::getArithmeticInstrCost(
- Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
+ unsigned Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ Opd1PropInfo, Opd2PropInfo);
// This is somewhat of a hack. The problem that we are facing is that SROA
// creates a sequence of shift, and, or instructions to construct values.
@@ -581,8 +465,9 @@ unsigned ARMTTI::getArithmeticInstrCost(
return Cost;
}
-unsigned ARMTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) const {
+unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) {
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
if (Src->isVectorTy() && Alignment != 16 &&
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
new file mode 100644
index 0000000..97590f6
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -0,0 +1,134 @@
+//===-- ARMTargetTransformInfo.h - ARM specific TTI -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// ARM target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
+
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
+ typedef BasicTTIImplBase<ARMTTIImpl> BaseT;
+ typedef TargetTransformInfo TTI;
+ friend BaseT;
+
+ const ARMSubtarget *ST;
+ const ARMTargetLowering *TLI;
+
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
+
+ const ARMSubtarget *getST() const { return ST; }
+ const ARMTargetLowering *getTLI() const { return TLI; }
+
+public:
+ explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, Function &F)
+ : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+
+ // Provide value semantics. MSVC requires that we spell all of these out.
+ ARMTTIImpl(const ARMTTIImpl &Arg)
+ : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+ ARMTTIImpl(ARMTTIImpl &&Arg)
+ : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+ TLI(std::move(Arg.TLI)) {}
+ ARMTTIImpl &operator=(const ARMTTIImpl &RHS) {
+ BaseT::operator=(static_cast<const BaseT &>(RHS));
+ ST = RHS.ST;
+ TLI = RHS.TLI;
+ return *this;
+ }
+ ARMTTIImpl &operator=(ARMTTIImpl &&RHS) {
+ BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+ ST = std::move(RHS.ST);
+ TLI = std::move(RHS.TLI);
+ return *this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+
+ using BaseT::getIntImmCost;
+ unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ unsigned getNumberOfRegisters(bool Vector) {
+ if (Vector) {
+ if (ST->hasNEON())
+ return 16;
+ return 0;
+ }
+
+ if (ST->isThumb1Only())
+ return 8;
+ return 13;
+ }
+
+ unsigned getRegisterBitWidth(bool Vector) {
+ if (Vector) {
+ if (ST->hasNEON())
+ return 128;
+ return 0;
+ }
+
+ return 32;
+ }
+
+ unsigned getMaxInterleaveFactor() {
+ // These are out of order CPUs:
+ if (ST->isCortexA15() || ST->isSwift())
+ return 2;
+ return 1;
+ }
+
+ unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp);
+
+ unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+
+ unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+
+ unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+
+ unsigned getAddressComputationCost(Type *Val, bool IsComplex);
+
+ unsigned getFPOpCost(Type *Ty);
+
+ unsigned getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
+ TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
+ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+
+ unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace);
+
+ /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 9cc89bd..59461e8 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -164,7 +164,10 @@ class ARMAsmParser : public MCTargetAsmParser {
// according to count of instructions in block.
// ~0U if no active IT block.
} ITState;
- bool inITBlock() { return ITState.CurPosition != ~0U;}
+ bool inITBlock() { return ITState.CurPosition != ~0U; }
+ bool lastInITBlock() {
+ return ITState.CurPosition == 4 - countTrailingZeros(ITState.Mask);
+ }
void forwardITPosition() {
if (!inITBlock()) return;
// Move to the next instruction in the IT block, if there is one. If not,
@@ -186,6 +189,11 @@ class ARMAsmParser : public MCTargetAsmParser {
return getParser().Error(L, Msg, Ranges);
}
+ bool validatetLDMRegList(MCInst Inst, const OperandVector &Operands,
+ unsigned ListNo, bool IsARPop = false);
+ bool validatetSTMRegList(MCInst Inst, const OperandVector &Operands,
+ unsigned ListNo);
+
int tryParseRegister();
bool tryParseRegisterWithWriteBack(OperandVector &);
int tryParseShiftRegister(OperandVector &);
@@ -305,6 +313,7 @@ class ARMAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseSetEndImm(OperandVector &);
OperandMatchResultTy parseShifterImm(OperandVector &);
OperandMatchResultTy parseRotImm(OperandVector &);
+ OperandMatchResultTy parseModImm(OperandVector &);
OperandMatchResultTy parseBitfield(OperandVector &);
OperandMatchResultTy parsePostIdxReg(OperandVector &);
OperandMatchResultTy parseAM3Offset(OperandVector &);
@@ -318,7 +327,7 @@ class ARMAsmParser : public MCTargetAsmParser {
void cvtThumbBranches(MCInst &Inst, const OperandVector &);
bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
- bool processInstruction(MCInst &Inst, const OperandVector &Ops);
+ bool processInstruction(MCInst &Inst, const OperandVector &Ops, MCStreamer &Out);
bool shouldOmitCCOutOperand(StringRef Mnemonic, OperandVector &Operands);
bool shouldOmitPredicateOperand(StringRef Mnemonic, OperandVector &Operands);
@@ -400,6 +409,7 @@ class ARMOperand : public MCParsedAsmOperand {
k_ShiftedImmediate,
k_ShifterImmediate,
k_RotateImmediate,
+ k_ModifiedImmediate,
k_BitfieldDescriptor,
k_Token
} Kind;
@@ -511,6 +521,11 @@ class ARMOperand : public MCParsedAsmOperand {
unsigned Imm;
};
+ struct ModImmOp {
+ unsigned Bits;
+ unsigned Rot;
+ };
+
struct BitfieldOp {
unsigned LSB;
unsigned Width;
@@ -537,6 +552,7 @@ class ARMOperand : public MCParsedAsmOperand {
struct RegShiftedRegOp RegShiftedReg;
struct RegShiftedImmOp RegShiftedImm;
struct RotImmOp RotImm;
+ struct ModImmOp ModImm;
struct BitfieldOp Bitfield;
};
@@ -612,6 +628,9 @@ public:
case k_RotateImmediate:
RotImm = o.RotImm;
break;
+ case k_ModifiedImmediate:
+ ModImm = o.ModImm;
+ break;
case k_BitfieldDescriptor:
Bitfield = o.Bitfield;
break;
@@ -1020,33 +1039,17 @@ public:
}
bool isAdrLabel() const {
// If we have an immediate that's not a constant, treat it as a label
- // reference needing a fixup. If it is a constant, but it can't fit
- // into shift immediate encoding, we reject it.
- if (isImm() && !isa<MCConstantExpr>(getImm())) return true;
- else return (isARMSOImm() || isARMSOImmNeg());
- }
- bool isARMSOImm() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return ARM_AM::getSOImmVal(Value) != -1;
- }
- bool isARMSOImmNot() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return ARM_AM::getSOImmVal(~Value) != -1;
- }
- bool isARMSOImmNeg() const {
+ // reference needing a fixup.
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
+ return true;
+
+ // If it is a constant, it must fit into a modified immediate encoding.
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
- // Only use this when not representable as a plain so_imm.
- return ARM_AM::getSOImmVal(Value) == -1 &&
- ARM_AM::getSOImmVal(-Value) != -1;
+ return (ARM_AM::getSOImmVal(Value) != -1 ||
+ ARM_AM::getSOImmVal(-Value) != -1);;
}
bool isT2SOImm() const {
if (!isImm()) return false;
@@ -1091,6 +1094,22 @@ public:
bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; }
bool isRegShiftedImm() const { return Kind == k_ShiftedImmediate; }
bool isRotImm() const { return Kind == k_RotateImmediate; }
+ bool isModImm() const { return Kind == k_ModifiedImmediate; }
+ bool isModImmNot() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getSOImmVal(~Value) != -1;
+ }
+ bool isModImmNeg() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getSOImmVal(Value) == -1 &&
+ ARM_AM::getSOImmVal(-Value) != -1;
+ }
bool isBitfield() const { return Kind == k_BitfieldDescriptor; }
bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; }
bool isPostIdxReg() const {
@@ -1826,6 +1845,30 @@ public:
Inst.addOperand(MCOperand::CreateImm(RotImm.Imm >> 3));
}
+ void addModImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ // Support for fixups (MCFixup)
+ if (isImm())
+ return addImmOperands(Inst, N);
+
+ Inst.addOperand(MCOperand::CreateImm(ModImm.Bits | (ModImm.Rot << 7)));
+ }
+
+ void addModImmNotOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ uint32_t Enc = ARM_AM::getSOImmVal(~CE->getValue());
+ Inst.addOperand(MCOperand::CreateImm(Enc));
+ }
+
+ void addModImmNegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ uint32_t Enc = ARM_AM::getSOImmVal(-CE->getValue());
+ Inst.addOperand(MCOperand::CreateImm(Enc));
+ }
+
void addBitfieldOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// Munge the lsb/width into a bitfield mask.
@@ -1982,22 +2025,6 @@ public:
Inst.addOperand(MCOperand::CreateImm(Memory.OffsetImm->getValue()));
}
- void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- // The operand is actually a so_imm, but we have its bitwise
- // negation in the assembly source, so twiddle it here.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(~CE->getValue()));
- }
-
- void addARMSOImmNegOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- // The operand is actually a so_imm, but we have its
- // negation in the assembly source, so twiddle it here.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
- }
-
void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
@@ -2630,6 +2657,16 @@ public:
return Op;
}
+ static std::unique_ptr<ARMOperand> CreateModImm(unsigned Bits, unsigned Rot,
+ SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_ModifiedImmediate);
+ Op->ModImm.Bits = Bits;
+ Op->ModImm.Rot = Rot;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
static std::unique_ptr<ARMOperand>
CreateBitfield(unsigned LSB, unsigned Width, SMLoc S, SMLoc E) {
auto Op = make_unique<ARMOperand>(k_BitfieldDescriptor);
@@ -2883,6 +2920,10 @@ void ARMOperand::print(raw_ostream &OS) const {
case k_RotateImmediate:
OS << "<ror " << " #" << (RotImm.Imm * 8) << ">";
break;
+ case k_ModifiedImmediate:
+ OS << "<mod_imm #" << ModImm.Bits << ", #"
+ << ModImm.Rot << ")>";
+ break;
case k_BitfieldDescriptor:
OS << "<bitfield " << "lsb: " << Bitfield.LSB
<< ", width: " << Bitfield.Width << ">";
@@ -4339,6 +4380,123 @@ ARMAsmParser::parseRotImm(OperandVector &Operands) {
}
ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseModImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ MCAsmLexer &Lexer = getLexer();
+ int64_t Imm1, Imm2;
+
+ SMLoc S = Parser.getTok().getLoc();
+
+ // 1) A mod_imm operand can appear in the place of a register name:
+ // add r0, #mod_imm
+ // add r0, r0, #mod_imm
+ // to correctly handle the latter, we bail out as soon as we see an
+ // identifier.
+ //
+ // 2) Similarly, we do not want to parse into complex operands:
+ // mov r0, #mod_imm
+ // mov r0, :lower16:(_foo)
+ if (Parser.getTok().is(AsmToken::Identifier) ||
+ Parser.getTok().is(AsmToken::Colon))
+ return MatchOperand_NoMatch;
+
+ // Hash (dollar) is optional as per the ARMARM
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar)) {
+ // Avoid parsing into complex operands (#:)
+ if (Lexer.peekTok().is(AsmToken::Colon))
+ return MatchOperand_NoMatch;
+
+ // Eat the hash (dollar)
+ Parser.Lex();
+ }
+
+ SMLoc Sx1, Ex1;
+ Sx1 = Parser.getTok().getLoc();
+ const MCExpr *Imm1Exp;
+ if (getParser().parseExpression(Imm1Exp, Ex1)) {
+ Error(Sx1, "malformed expression");
+ return MatchOperand_ParseFail;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm1Exp);
+
+ if (CE) {
+ // Immediate must fit within 32-bits
+ Imm1 = CE->getValue();
+ int Enc = ARM_AM::getSOImmVal(Imm1);
+ if (Enc != -1 && Parser.getTok().is(AsmToken::EndOfStatement)) {
+ // We have a match!
+ Operands.push_back(ARMOperand::CreateModImm((Enc & 0xFF),
+ (Enc & 0xF00) >> 7,
+ Sx1, Ex1));
+ return MatchOperand_Success;
+ }
+
+ // We have parsed an immediate which is not for us, fallback to a plain
+ // immediate. This can happen for instruction aliases. For an example,
+ // ARMInstrInfo.td defines the alias [mov <-> mvn] which can transform
+ // a mov (mvn) with a mod_imm_neg/mod_imm_not operand into the opposite
+ // instruction with a mod_imm operand. The alias is defined such that the
+ // parser method is shared, that's why we have to do this here.
+ if (Parser.getTok().is(AsmToken::EndOfStatement)) {
+ Operands.push_back(ARMOperand::CreateImm(Imm1Exp, Sx1, Ex1));
+ return MatchOperand_Success;
+ }
+ } else {
+ // Operands like #(l1 - l2) can only be evaluated at a later stage (via an
+ // MCFixup). Fallback to a plain immediate.
+ Operands.push_back(ARMOperand::CreateImm(Imm1Exp, Sx1, Ex1));
+ return MatchOperand_Success;
+ }
+
+ // From this point onward, we expect the input to be a (#bits, #rot) pair
+ if (Parser.getTok().isNot(AsmToken::Comma)) {
+ Error(Sx1, "expected modified immediate operand: #[0, 255], #even[0-30]");
+ return MatchOperand_ParseFail;
+ }
+
+ if (Imm1 & ~0xFF) {
+ Error(Sx1, "immediate operand must a number in the range [0, 255]");
+ return MatchOperand_ParseFail;
+ }
+
+ // Eat the comma
+ Parser.Lex();
+
+ // Repeat for #rot
+ SMLoc Sx2, Ex2;
+ Sx2 = Parser.getTok().getLoc();
+
+ // Eat the optional hash (dollar)
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar))
+ Parser.Lex();
+
+ const MCExpr *Imm2Exp;
+ if (getParser().parseExpression(Imm2Exp, Ex2)) {
+ Error(Sx2, "malformed expression");
+ return MatchOperand_ParseFail;
+ }
+
+ CE = dyn_cast<MCConstantExpr>(Imm2Exp);
+
+ if (CE) {
+ Imm2 = CE->getValue();
+ if (!(Imm2 & ~0x1E)) {
+ // We have a match!
+ Operands.push_back(ARMOperand::CreateModImm(Imm1, Imm2, S, Ex2));
+ return MatchOperand_Success;
+ }
+ Error(Sx2, "immediate operand must an even number in the range [0, 30]");
+ return MatchOperand_ParseFail;
+ } else {
+ Error(Sx2, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+}
+
+ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseBitfield(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
@@ -5091,15 +5249,52 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
return true;
}
+ enum {
+ COFF = (1 << MCObjectFileInfo::IsCOFF),
+ ELF = (1 << MCObjectFileInfo::IsELF),
+ MACHO = (1 << MCObjectFileInfo::IsMachO)
+ };
+ static const struct PrefixEntry {
+ const char *Spelling;
+ ARMMCExpr::VariantKind VariantKind;
+ uint8_t SupportedFormats;
+ } PrefixEntries[] = {
+ { "lower16", ARMMCExpr::VK_ARM_LO16, COFF | ELF | MACHO },
+ { "upper16", ARMMCExpr::VK_ARM_HI16, COFF | ELF | MACHO },
+ };
+
StringRef IDVal = Parser.getTok().getIdentifier();
- if (IDVal == "lower16") {
- RefKind = ARMMCExpr::VK_ARM_LO16;
- } else if (IDVal == "upper16") {
- RefKind = ARMMCExpr::VK_ARM_HI16;
- } else {
+
+ const auto &Prefix =
+ std::find_if(std::begin(PrefixEntries), std::end(PrefixEntries),
+ [&IDVal](const PrefixEntry &PE) {
+ return PE.Spelling == IDVal;
+ });
+ if (Prefix == std::end(PrefixEntries)) {
Error(Parser.getTok().getLoc(), "unexpected prefix in operand");
return true;
}
+
+ uint8_t CurrentFormat;
+ switch (getContext().getObjectFileInfo()->getObjectFileType()) {
+ case MCObjectFileInfo::IsMachO:
+ CurrentFormat = MACHO;
+ break;
+ case MCObjectFileInfo::IsELF:
+ CurrentFormat = ELF;
+ break;
+ case MCObjectFileInfo::IsCOFF:
+ CurrentFormat = COFF;
+ break;
+ }
+
+ if (~Prefix->SupportedFormats & CurrentFormat) {
+ Error(Parser.getTok().getLoc(),
+ "cannot represent relocation in the current file format");
+ return true;
+ }
+
+ RefKind = Prefix->VariantKind;
Parser.Lex();
if (getLexer().isNot(AsmToken::Colon)) {
@@ -5107,6 +5302,7 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
return true;
}
Parser.Lex(); // Eat the last ':'
+
return false;
}
@@ -5139,7 +5335,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "fmuls" || Mnemonic == "vmaxnm" || Mnemonic == "vminnm" ||
Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" ||
Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" ||
- Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic.startswith("vsel"))
+ Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" ||
+ Mnemonic.startswith("vsel"))
return Mnemonic;
// First, split out any predication code. Ignore mnemonics we know aren't
@@ -5244,7 +5441,7 @@ getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" ||
Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" ||
Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" ||
- Mnemonic == "vrintm" || Mnemonic.startswith("aes") ||
+ Mnemonic == "vrintm" || Mnemonic.startswith("aes") || Mnemonic == "hvc" ||
Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") ||
(FullInst.startswith("vmull") && FullInst.endswith(".p64"))) {
// These mnemonics are never predicable
@@ -5282,7 +5479,7 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
// conditionally adding the cc_out in the first place because we need
// to check the type of the parsed immediate operand.
if (Mnemonic == "mov" && Operands.size() > 4 && !isThumb() &&
- !static_cast<ARMOperand &>(*Operands[4]).isARMSOImm() &&
+ !static_cast<ARMOperand &>(*Operands[4]).isModImm() &&
static_cast<ARMOperand &>(*Operands[4]).isImm0_65535Expr() &&
static_cast<ARMOperand &>(*Operands[1]).getReg() == 0)
return true;
@@ -5823,6 +6020,50 @@ static bool instIsBreakpoint(const MCInst &Inst) {
}
+bool ARMAsmParser::validatetLDMRegList(MCInst Inst,
+ const OperandVector &Operands,
+ unsigned ListNo, bool IsARPop) {
+ const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]);
+ bool HasWritebackToken = Op.isToken() && Op.getToken() == "!";
+
+ bool ListContainsSP = listContainsReg(Inst, ListNo, ARM::SP);
+ bool ListContainsLR = listContainsReg(Inst, ListNo, ARM::LR);
+ bool ListContainsPC = listContainsReg(Inst, ListNo, ARM::PC);
+
+ if (!IsARPop && ListContainsSP)
+ return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
+ "SP may not be in the register list");
+ else if (ListContainsPC && ListContainsLR)
+ return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
+ "PC and LR may not be in the register list simultaneously");
+ else if (inITBlock() && !lastInITBlock() && ListContainsPC)
+ return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
+ "instruction must be outside of IT block or the last "
+ "instruction in an IT block");
+ return false;
+}
+
+bool ARMAsmParser::validatetSTMRegList(MCInst Inst,
+ const OperandVector &Operands,
+ unsigned ListNo) {
+ const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]);
+ bool HasWritebackToken = Op.isToken() && Op.getToken() == "!";
+
+ bool ListContainsSP = listContainsReg(Inst, ListNo, ARM::SP);
+ bool ListContainsPC = listContainsReg(Inst, ListNo, ARM::PC);
+
+ if (ListContainsSP && ListContainsPC)
+ return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
+ "SP and PC may not be in the register list");
+ else if (ListContainsSP)
+ return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
+ "SP may not be in the register list");
+ else if (ListContainsPC)
+ return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
+ "PC may not be in the register list");
+ return false;
+}
+
// FIXME: We would really like to be able to tablegen'erate this.
bool ARMAsmParser::validateInstruction(MCInst &Inst,
const OperandVector &Operands) {
@@ -6006,9 +6247,9 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
return Error(Operands[3]->getStartLoc(),
"writeback operator '!' not allowed when base register "
"in register list");
- if (listContainsReg(Inst, 3 + HasWritebackToken, ARM::SP))
- return Error(Operands[3 + HasWritebackToken]->getStartLoc(),
- "SP not allowed in register list");
+
+ if (validatetLDMRegList(Inst, Operands, 3))
+ return true;
break;
}
case ARM::LDMIA_UPD:
@@ -6025,13 +6266,14 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
break;
case ARM::t2LDMIA:
case ARM::t2LDMDB:
+ if (validatetLDMRegList(Inst, Operands, 3))
+ return true;
+ break;
case ARM::t2STMIA:
- case ARM::t2STMDB: {
- if (listContainsReg(Inst, 3, ARM::SP))
- return Error(Operands.back()->getStartLoc(),
- "SP not allowed in register list");
+ case ARM::t2STMDB:
+ if (validatetSTMRegList(Inst, Operands, 3))
+ return true;
break;
- }
case ARM::t2LDMIA_UPD:
case ARM::t2LDMDB_UPD:
case ARM::t2STMIA_UPD:
@@ -6040,9 +6282,13 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
return Error(Operands.back()->getStartLoc(),
"writeback register not allowed in register list");
- if (listContainsReg(Inst, 4, ARM::SP))
- return Error(Operands.back()->getStartLoc(),
- "SP not allowed in register list");
+ if (Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
+ if (validatetLDMRegList(Inst, Operands, 3))
+ return true;
+ } else {
+ if (validatetSTMRegList(Inst, Operands, 3))
+ return true;
+ }
break;
}
case ARM::sysLDMIA_UPD:
@@ -6087,6 +6333,8 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
!isThumbTwo())
return Error(Operands[2]->getStartLoc(),
"registers must be in range r0-r7 or pc");
+ if (validatetLDMRegList(Inst, Operands, 2, !isMClass()))
+ return true;
break;
}
case ARM::tPUSH: {
@@ -6095,6 +6343,8 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
!isThumbTwo())
return Error(Operands[2]->getStartLoc(),
"registers must be in range r0-r7 or lr");
+ if (validatetSTMRegList(Inst, Operands, 2))
+ return true;
break;
}
case ARM::tSTMIA_UPD: {
@@ -6111,9 +6361,9 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
return Error(Operands[4]->getStartLoc(),
"writeback operator '!' not allowed when base register "
"in register list");
- if (listContainsReg(Inst, 4, ARM::SP) && !inITBlock())
- return Error(Operands.back()->getStartLoc(),
- "SP not allowed in register list");
+
+ if (validatetSTMRegList(Inst, Operands, 4))
+ return true;
break;
}
case ARM::tADDrSP: {
@@ -6434,7 +6684,8 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) {
}
bool ARMAsmParser::processInstruction(MCInst &Inst,
- const OperandVector &Operands) {
+ const OperandVector &Operands,
+ MCStreamer &Out) {
switch (Inst.getOpcode()) {
// Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction.
case ARM::LDRT_POST:
@@ -6475,12 +6726,35 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
// Alias for alternate form of 'ADR Rd, #imm' instruction.
case ARM::ADDri: {
if (Inst.getOperand(1).getReg() != ARM::PC ||
- Inst.getOperand(5).getReg() != 0)
+ Inst.getOperand(5).getReg() != 0 ||
+ !(Inst.getOperand(2).isExpr() || Inst.getOperand(2).isImm()))
return false;
MCInst TmpInst;
TmpInst.setOpcode(ARM::ADR);
TmpInst.addOperand(Inst.getOperand(0));
- TmpInst.addOperand(Inst.getOperand(2));
+ if (Inst.getOperand(2).isImm()) {
+ // Immediate (mod_imm) will be in its encoded form, we must unencode it
+ // before passing it to the ADR instruction.
+ unsigned Enc = Inst.getOperand(2).getImm();
+ TmpInst.addOperand(MCOperand::CreateImm(
+ ARM_AM::rotr32(Enc & 0xFF, (Enc & 0xF00) >> 7)));
+ } else {
+ // Turn PC-relative expression into absolute expression.
+ // Reading PC provides the start of the current instruction + 8 and
+ // the transform to adr is biased by that.
+ MCSymbol *Dot = getContext().CreateTempSymbol();
+ Out.EmitLabel(Dot);
+ const MCExpr *OpExpr = Inst.getOperand(2).getExpr();
+ const MCExpr *InstPC = MCSymbolRefExpr::Create(Dot,
+ MCSymbolRefExpr::VK_None,
+ getContext());
+ const MCExpr *Const8 = MCConstantExpr::Create(8, getContext());
+ const MCExpr *ReadPC = MCBinaryExpr::CreateAdd(InstPC, Const8,
+ getContext());
+ const MCExpr *FixupAddr = MCBinaryExpr::CreateAdd(ReadPC, OpExpr,
+ getContext());
+ TmpInst.addOperand(MCOperand::CreateExpr(FixupAddr));
+ }
TmpInst.addOperand(Inst.getOperand(3));
TmpInst.addOperand(Inst.getOperand(4));
Inst = TmpInst;
@@ -8302,7 +8576,6 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
MatchingInlineAsm);
switch (MatchResult) {
- default: break;
case Match_Success:
// Context sensitive operand constraints aren't handled by the matcher,
// so check them here.
@@ -8320,7 +8593,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// encoding is selected. Loop on it while changes happen so the
// individual transformations can chain off each other. E.g.,
// tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8)
- while (processInstruction(Inst, Operands))
+ while (processInstruction(Inst, Operands, Out))
;
// Only after the instruction is fully processed, we can validate it
@@ -8732,7 +9005,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
Parser.Lex(); // Consume the EndOfStatement
- if (!RegisterReqs.insert(std::make_pair(Name, Reg)).second) {
+ if (RegisterReqs.insert(std::make_pair(Name, Reg)).first->second != Reg) {
Error(SRegLoc, "redefinition of '" + Name + "' does not match original.");
return false;
}
@@ -8858,8 +9131,13 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
if (Tag == ARMBuildAttrs::compatibility) {
if (Parser.getTok().isNot(AsmToken::Comma))
IsStringValue = false;
- else
- Parser.Lex();
+ if (Parser.getTok().isNot(AsmToken::Comma)) {
+ Error(Parser.getTok().getLoc(), "comma expected");
+ Parser.eatToEndOfStatement();
+ return false;
+ } else {
+ Parser.Lex();
+ }
}
if (IsStringValue) {
@@ -8888,38 +9166,78 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
StringRef CPU = getParser().parseStringToEndOfStatement().trim();
getTargetStreamer().emitTextAttribute(ARMBuildAttrs::CPU_name, CPU);
+
+ if (!STI.isCPUStringValid(CPU)) {
+ Error(L, "Unknown CPU name");
+ return false;
+ }
+
+ // FIXME: This switches the CPU features globally, therefore it might
+ // happen that code you would not expect to assemble will. For details
+ // see: http://llvm.org/bugs/show_bug.cgi?id=20757
+ STI.InitMCProcessorInfo(CPU, "");
+ STI.InitCPUSchedModel(CPU);
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+
return false;
}
// FIXME: This is duplicated in getARMFPUFeatures() in
// tools/clang/lib/Driver/Tools.cpp
static const struct {
- const unsigned Fpu;
+ const unsigned ID;
const uint64_t Enabled;
const uint64_t Disabled;
-} Fpus[] = {
- {ARM::VFP, ARM::FeatureVFP2, ARM::FeatureNEON},
- {ARM::VFPV2, ARM::FeatureVFP2, ARM::FeatureNEON},
- {ARM::VFPV3, ARM::FeatureVFP3, ARM::FeatureNEON},
- {ARM::VFPV3_D16, ARM::FeatureVFP3 | ARM::FeatureD16, ARM::FeatureNEON},
- {ARM::VFPV4, ARM::FeatureVFP4, ARM::FeatureNEON},
- {ARM::VFPV4_D16, ARM::FeatureVFP4 | ARM::FeatureD16, ARM::FeatureNEON},
- {ARM::FPV5_D16, ARM::FeatureFPARMv8 | ARM::FeatureD16,
- ARM::FeatureNEON | ARM::FeatureCrypto},
- {ARM::FP_ARMV8, ARM::FeatureFPARMv8,
- ARM::FeatureNEON | ARM::FeatureCrypto},
- {ARM::NEON, ARM::FeatureNEON, 0},
- {ARM::NEON_VFPV4, ARM::FeatureVFP4 | ARM::FeatureNEON, 0},
- {ARM::NEON_FP_ARMV8, ARM::FeatureFPARMv8 | ARM::FeatureNEON,
- ARM::FeatureCrypto},
- {ARM::CRYPTO_NEON_FP_ARMV8,
- ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto, 0},
- {ARM::SOFTVFP, 0, 0},
+} FPUs[] = {
+ {/* ID */ ARM::VFP,
+ /* Enabled */ ARM::FeatureVFP2,
+ /* Disabled */ ARM::FeatureNEON},
+ {/* ID */ ARM::VFPV2,
+ /* Enabled */ ARM::FeatureVFP2,
+ /* Disabled */ ARM::FeatureNEON},
+ {/* ID */ ARM::VFPV3,
+ /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3,
+ /* Disabled */ ARM::FeatureNEON | ARM::FeatureD16},
+ {/* ID */ ARM::VFPV3_D16,
+ /* Enable */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureD16,
+ /* Disabled */ ARM::FeatureNEON},
+ {/* ID */ ARM::VFPV4,
+ /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4,
+ /* Disabled */ ARM::FeatureNEON | ARM::FeatureD16},
+ {/* ID */ ARM::VFPV4_D16,
+ /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
+ ARM::FeatureD16,
+ /* Disabled */ ARM::FeatureNEON},
+ {/* ID */ ARM::FPV5_D16,
+ /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
+ ARM::FeatureFPARMv8 | ARM::FeatureD16,
+ /* Disabled */ ARM::FeatureNEON | ARM::FeatureCrypto},
+ {/* ID */ ARM::FP_ARMV8,
+ /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
+ ARM::FeatureFPARMv8,
+ /* Disabled */ ARM::FeatureNEON | ARM::FeatureCrypto | ARM::FeatureD16},
+ {/* ID */ ARM::NEON,
+ /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureNEON,
+ /* Disabled */ ARM::FeatureD16},
+ {/* ID */ ARM::NEON_VFPV4,
+ /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
+ ARM::FeatureNEON,
+ /* Disabled */ ARM::FeatureD16},
+ {/* ID */ ARM::NEON_FP_ARMV8,
+ /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
+ ARM::FeatureFPARMv8 | ARM::FeatureNEON,
+ /* Disabled */ ARM::FeatureCrypto | ARM::FeatureD16},
+ {/* ID */ ARM::CRYPTO_NEON_FP_ARMV8,
+ /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
+ ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto,
+ /* Disabled */ ARM::FeatureD16},
+ {ARM::SOFTVFP, 0, 0},
};
/// parseDirectiveFPU
/// ::= .fpu str
bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
+ SMLoc FPUNameLoc = getTok().getLoc();
StringRef FPU = getParser().parseStringToEndOfStatement().trim();
unsigned ID = StringSwitch<unsigned>(FPU)
@@ -8928,18 +9246,18 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
.Default(ARM::INVALID_FPU);
if (ID == ARM::INVALID_FPU) {
- Error(L, "Unknown FPU name");
+ Error(FPUNameLoc, "Unknown FPU name");
return false;
}
- for (const auto &Fpu : Fpus) {
- if (Fpu.Fpu != ID)
+ for (const auto &Entry : FPUs) {
+ if (Entry.ID != ID)
continue;
// Need to toggle features that should be on but are off and that
// should off but are on.
- uint64_t Toggle = (Fpu.Enabled & ~STI.getFeatureBits()) |
- (Fpu.Disabled & STI.getFeatureBits());
+ uint64_t Toggle = (Entry.Enabled & ~STI.getFeatureBits()) |
+ (Entry.Disabled & STI.getFeatureBits());
setAvailableFeatures(ComputeAvailableFeatures(STI.ToggleFeature(Toggle)));
break;
}
@@ -9766,7 +10084,7 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
if (CE->getValue() == 0)
return Match_Success;
break;
- case MCK_ARMSOImm:
+ case MCK_ModImm:
if (Op.isImm()) {
const MCExpr *SOExpr = Op.getImm();
int64_t Value;
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index ef65418..4d5122a 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -176,8 +176,6 @@ static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
@@ -405,6 +403,28 @@ static MCDisassembler *createThumbDisassembler(const Target &T,
return new ThumbDisassembler(STI, Ctx);
}
+// Post-decoding checks
+static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size,
+ uint64_t Address, raw_ostream &OS,
+ raw_ostream &CS,
+ uint32_t Insn,
+ DecodeStatus Result)
+{
+ switch (MI.getOpcode()) {
+ case ARM::HVC: {
+ // HVC is undefined if condition = 0xf otherwise upredictable
+ // if condition != 0xe
+ uint32_t Cond = (Insn >> 28) & 0xF;
+ if (Cond == 0xF)
+ return MCDisassembler::Fail;
+ if (Cond != 0xE)
+ return MCDisassembler::SoftFail;
+ return Result;
+ }
+ default: return Result;
+ }
+}
+
DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address, raw_ostream &OS,
@@ -430,7 +450,7 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
decodeInstruction(DecoderTableARM32, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
- return Result;
+ return checkDecodedInstruction(MI, Size, Address, OS, CS, Insn, Result);
}
// VFP and NEON instructions, similarly, are shared between ARM
@@ -1113,15 +1133,6 @@ static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
- uint32_t imm = Val & 0xFF;
- uint32_t rot = (Val & 0xF00) >> 7;
- uint32_t rot_imm = (imm >> rot) | (imm << ((32-rot) & 0x1F));
- Inst.addOperand(MCOperand::CreateImm(rot_imm));
- return MCDisassembler::Success;
-}
-
static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4960,7 +4971,7 @@ static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val,
DecodeStatus S = MCDisassembler::Success;
// Shift of "asr #32" is not allowed in Thumb2 mode.
- if (Val == 0x20) S = MCDisassembler::SoftFail;
+ if (Val == 0x20) S = MCDisassembler::Fail;
Inst.addOperand(MCOperand::CreateImm(Val));
return S;
}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 0570084..16eea33 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -269,7 +269,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
// expressed as a GPRPair, so we have to manually merge them.
// FIXME: We would really like to be able to tablegen'erate this.
case ARM::LDREXD: case ARM::STREXD:
- case ARM::LDAEXD: case ARM::STLEXD:
+ case ARM::LDAEXD: case ARM::STLEXD: {
const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID);
bool isStore = Opcode == ARM::STREXD || Opcode == ARM::STLEXD;
unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg();
@@ -290,6 +290,23 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
printInstruction(&NewMI, O);
return;
}
+ break;
+ }
+ // B9.3.3 ERET (Thumb)
+ // For a target that has Virtualization Extensions, ERET is the preferred
+ // disassembly of SUBS PC, LR, #0
+ case ARM::t2SUBS_PC_LR: {
+ if (MI->getNumOperands() == 3 &&
+ MI->getOperand(0).isImm() &&
+ MI->getOperand(0).getImm() == 0 &&
+ (getAvailableFeatures() & ARM::FeatureVirtualization)) {
+ O << "\teret";
+ printPredicateOperand(MI, 1, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+ break;
+ }
}
printInstruction(MI, O);
@@ -1301,6 +1318,52 @@ void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
O << markup(">");
}
+void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ MCOperand Op = MI->getOperand(OpNum);
+
+ // Support for fixups (MCFixup)
+ if (Op.isExpr())
+ return printOperand(MI, OpNum, O);
+
+ unsigned Bits = Op.getImm() & 0xFF;
+ unsigned Rot = (Op.getImm() & 0xF00) >> 7;
+
+ bool PrintUnsigned = false;
+ switch (MI->getOpcode()){
+ case ARM::MOVi:
+ // Movs to PC should be treated unsigned
+ PrintUnsigned = (MI->getOperand(OpNum - 1).getReg() == ARM::PC);
+ break;
+ case ARM::MSRi:
+ // Movs to special registers should be treated unsigned
+ PrintUnsigned = true;
+ break;
+ }
+
+ int32_t Rotated = ARM_AM::rotr32(Bits, Rot);
+ if (ARM_AM::getSOImmVal(Rotated) == Op.getImm()) {
+ // #rot has the least possible value
+ O << "#" << markup("<imm:");
+ if (PrintUnsigned)
+ O << static_cast<uint32_t>(Rotated);
+ else
+ O << Rotated;
+ O << markup(">");
+ return;
+ }
+
+ // Explicit #bits, #rot implied
+ O << "#"
+ << markup("<imm:")
+ << Bits
+ << markup(">")
+ << ", #"
+ << markup("<imm:")
+ << Rot
+ << markup(">");
+}
+
void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
O << markup("<imm:")
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 09fd536..f179e01 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -131,6 +131,7 @@ public:
void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printGPRPairOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index f24b419..a821a6b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -51,7 +51,7 @@ ARMELFObjectWriter::~ARMELFObjectWriter() {}
bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
unsigned Type) const {
- // FIXME: This is extremelly conservative. This really needs to use a
+ // FIXME: This is extremely conservative. This really needs to use a
// whitelist with a clear explanation for why each realocation needs to
// point to the symbol, not to the section.
switch (Type) {
@@ -148,6 +148,22 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
} else {
switch ((unsigned)Fixup.getKind()) {
default: llvm_unreachable("invalid fixup kind!");
+ case FK_Data_1:
+ switch (Modifier) {
+ default: llvm_unreachable("unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_ABS8;
+ break;
+ }
+ break;
+ case FK_Data_2:
+ switch (Modifier) {
+ default: llvm_unreachable("unsupported modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_ABS16;
+ break;
+ }
+ break;
case FK_Data_4:
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
@@ -184,6 +200,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_ARM_PREL31:
Type = ELF::R_ARM_PREL31;
break;
+ case MCSymbolRefExpr::VK_ARM_SBREL:
+ Type = ELF::R_ARM_SBREL32;
+ break;
case MCSymbolRefExpr::VK_ARM_TLSLDO:
Type = ELF::R_ARM_TLS_LDO32;
break;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 24ee537..2b65520 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -15,6 +15,7 @@
#include "ARMArchName.h"
#include "ARMFPUName.h"
+#include "ARMArchExtName.h"
#include "ARMRegisterInfo.h"
#include "ARMUnwindOpAsm.h"
#include "llvm/ADT/StringExtras.h"
@@ -105,6 +106,19 @@ static unsigned GetArchDefaultCPUArch(unsigned ID) {
return 0;
}
+static const char *GetArchExtName(unsigned ID) {
+ switch (ID) {
+ default:
+ llvm_unreachable("Unknown ARCH Extension kind");
+ break;
+#define ARM_ARCHEXT_NAME(NAME, ID) \
+ case ARM::ID: \
+ return NAME;
+#include "ARMArchExtName.def"
+ }
+ return nullptr;
+}
+
namespace {
class ARMELFStreamer;
@@ -134,6 +148,7 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer {
void emitIntTextAttribute(unsigned Attribute, unsigned IntValue,
StringRef StrinValue) override;
void emitArch(unsigned Arch) override;
+ void emitArchExtension(unsigned ArchExt) override;
void emitObjectArch(unsigned Arch) override;
void emitFPU(unsigned FPU) override;
void emitInst(uint32_t Inst, char Suffix = '\0') override;
@@ -249,6 +264,9 @@ void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute,
void ARMTargetAsmStreamer::emitArch(unsigned Arch) {
OS << "\t.arch\t" << GetArchName(Arch) << "\n";
}
+void ARMTargetAsmStreamer::emitArchExtension(unsigned ArchExt) {
+ OS << "\t.arch_extension\t" << GetArchExtName(ArchExt) << "\n";
+}
void ARMTargetAsmStreamer::emitObjectArch(unsigned Arch) {
OS << "\t.object_arch\t" << GetArchName(Arch) << '\n';
}
@@ -300,7 +318,19 @@ private:
StringRef StringValue;
static bool LessTag(const AttributeItem &LHS, const AttributeItem &RHS) {
- return (LHS.Tag < RHS.Tag);
+ // The conformance tag must be emitted first when serialised
+ // into an object file. Specifically, the addenda to the ARM ABI
+ // states that (2.3.7.4):
+ //
+ // "To simplify recognition by consumers in the common case of
+ // claiming conformity for the whole file, this tag should be
+ // emitted first in a file-scope sub-subsection of the first
+ // public subsection of the attributes section."
+ //
+ // So it is special-cased in this comparison predicate when the
+ // attributes are sorted in finishAttributeSection().
+ return (RHS.Tag != ARMBuildAttrs::conformance) &&
+ ((LHS.Tag == ARMBuildAttrs::conformance) || (LHS.Tag < RHS.Tag));
}
};
@@ -541,6 +571,10 @@ public:
/// necessary.
void EmitValueImpl(const MCExpr *Value, unsigned Size,
const SMLoc &Loc) override {
+ if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value))
+ if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4))
+ getContext().FatalError(Loc, "relocated expression must be 32-bit");
+
EmitDataMappingSymbol();
MCELFStreamer::EmitValueImpl(Value, Size);
}
@@ -942,11 +976,8 @@ void ARMTargetELFStreamer::finishAttributeSection() {
if (AttributeSection) {
Streamer.SwitchSection(AttributeSection);
} else {
- AttributeSection =
- Streamer.getContext().getELFSection(".ARM.attributes",
- ELF::SHT_ARM_ATTRIBUTES,
- 0,
- SectionKind::getMetadata());
+ AttributeSection = Streamer.getContext().getELFSection(
+ ".ARM.attributes", ELF::SHT_ARM_ATTRIBUTES, 0);
Streamer.SwitchSection(AttributeSection);
// Format version
@@ -979,12 +1010,12 @@ void ARMTargetELFStreamer::finishAttributeSection() {
Streamer.EmitULEB128IntValue(item.IntValue);
break;
case AttributeItem::TextAttribute:
- Streamer.EmitBytes(item.StringValue.upper());
+ Streamer.EmitBytes(item.StringValue);
Streamer.EmitIntValue(0, 1); // '\0'
break;
case AttributeItem::NumericAndTextAttributes:
Streamer.EmitULEB128IntValue(item.IntValue);
- Streamer.EmitBytes(item.StringValue.upper());
+ Streamer.EmitBytes(item.StringValue);
Streamer.EmitIntValue(0, 1); // '\0'
break;
}
@@ -1053,11 +1084,11 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
// Get .ARM.extab or .ARM.exidx section
const MCSectionELF *EHSection = nullptr;
if (const MCSymbol *Group = FnSection.getGroup()) {
- EHSection = getContext().getELFSection(
- EHSecName, Type, Flags | ELF::SHF_GROUP, Kind,
- FnSection.getEntrySize(), Group->getName());
+ EHSection =
+ getContext().getELFSection(EHSecName, Type, Flags | ELF::SHF_GROUP,
+ FnSection.getEntrySize(), Group->getName());
} else {
- EHSection = getContext().getELFSection(EHSecName, Type, Flags, Kind);
+ EHSection = getContext().getELFSection(EHSecName, Type, Flags);
}
assert(EHSection && "Failed to get the required EH section");
@@ -1341,10 +1372,8 @@ MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
return S;
}
-MCStreamer *createARMNullStreamer(MCContext &Ctx) {
- MCStreamer *S = llvm::createNullStreamer(Ctx);
- new ARMTargetStreamer(*S);
- return S;
+MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S) {
+ return new ARMTargetStreamer(S);
}
MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 1d82099..66a1618 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "ARMMCAsmInfo.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -89,6 +89,7 @@ ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() {
AlignmentIsInBytes = false;
PrivateGlobalPrefix = "$M";
+ PrivateLabelPrefix = "$M";
}
void ARMCOFFMCAsmInfoGNU::anchor() { }
@@ -101,6 +102,7 @@ ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() {
Code16Directive = ".code\t16";
Code32Directive = ".code\t32";
PrivateGlobalPrefix = ".L";
+ PrivateLabelPrefix = ".L";
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::None;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index f1fef41..6cb4715 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -21,7 +21,8 @@
namespace llvm {
class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
- void anchor() override;
+ virtual void anchor();
+
public:
explicit ARMMCAsmInfoDarwin(StringRef TT);
};
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index b8ee555..efbebd3 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -37,8 +37,8 @@ STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created.");
namespace {
class ARMMCCodeEmitter : public MCCodeEmitter {
- ARMMCCodeEmitter(const ARMMCCodeEmitter &) LLVM_DELETED_FUNCTION;
- void operator=(const ARMMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ ARMMCCodeEmitter(const ARMMCCodeEmitter &) = delete;
+ void operator=(const ARMMCCodeEmitter &) = delete;
const MCInstrInfo &MCII;
const MCContext &CTX;
bool IsLittleEndian;
@@ -304,6 +304,28 @@ public:
return Binary;
}
+ unsigned getModImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &ST) const {
+ const MCOperand &MO = MI.getOperand(Op);
+
+ // Support for fixups (MCFixup)
+ if (MO.isExpr()) {
+ const MCExpr *Expr = MO.getExpr();
+ // In instruction code this value always encoded as lowest 12 bits,
+ // so we don't have to perform any specific adjustments.
+ // Due to requirements of relocatable records we have to use FK_Data_4.
+ // See ARMELFObjectWriter::ExplicitRelSym and
+ // ARMELFObjectWriter::GetRelocTypeInner for more details.
+ MCFixupKind Kind = MCFixupKind(FK_Data_4);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+ return 0;
+ }
+
+ // Immediate is already in its encoded format
+ return MO.getImm();
+ }
+
/// getT2SOImmOpValue - Return an encoded 12-bit shifted-immediate value.
unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 98190ba..8c19785 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -64,10 +64,60 @@ static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
}
static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
- std::string &Info) {
- if (STI.getFeatureBits() & llvm::ARM::HasV8Ops &&
- MI.getOperand(1).isImm() && MI.getOperand(1).getImm() != 8) {
- Info = "applying IT instruction to more than one subsequent instruction is deprecated";
+ std::string &Info) {
+ if (STI.getFeatureBits() & llvm::ARM::HasV8Ops && MI.getOperand(1).isImm() &&
+ MI.getOperand(1).getImm() != 8) {
+ Info = "applying IT instruction to more than one subsequent instruction is "
+ "deprecated";
+ return true;
+ }
+
+ return false;
+}
+
+static bool getARMStoreDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+ std::string &Info) {
+ assert((~STI.getFeatureBits() & llvm::ARM::ModeThumb) &&
+ "cannot predicate thumb instructions");
+
+ assert(MI.getNumOperands() >= 4 && "expected >= 4 arguments");
+ for (unsigned OI = 4, OE = MI.getNumOperands(); OI < OE; ++OI) {
+ assert(MI.getOperand(OI).isReg() && "expected register");
+ if (MI.getOperand(OI).getReg() == ARM::SP ||
+ MI.getOperand(OI).getReg() == ARM::PC) {
+ Info = "use of SP or PC in the list is deprecated";
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool getARMLoadDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+ std::string &Info) {
+ assert((~STI.getFeatureBits() & llvm::ARM::ModeThumb) &&
+ "cannot predicate thumb instructions");
+
+ assert(MI.getNumOperands() >= 4 && "expected >= 4 arguments");
+ bool ListContainsPC = false, ListContainsLR = false;
+ for (unsigned OI = 4, OE = MI.getNumOperands(); OI < OE; ++OI) {
+ assert(MI.getOperand(OI).isReg() && "expected register");
+ switch (MI.getOperand(OI).getReg()) {
+ default:
+ break;
+ case ARM::LR:
+ ListContainsLR = true;
+ break;
+ case ARM::PC:
+ ListContainsPC = true;
+ break;
+ case ARM::SP:
+ Info = "use of SP in the list is deprecated";
+ return true;
+ }
+ }
+
+ if (ListContainsPC && ListContainsLR) {
+ Info = "use of LR and PC simultaneously in the list is deprecated";
return true;
}
@@ -405,11 +455,15 @@ extern "C" void LLVMInitializeARMTargetMC() {
TargetRegistry::RegisterAsmStreamer(TheThumbLETarget, createMCAsmStreamer);
TargetRegistry::RegisterAsmStreamer(TheThumbBETarget, createMCAsmStreamer);
- // Register the null streamer.
- TargetRegistry::RegisterNullStreamer(TheARMLETarget, createARMNullStreamer);
- TargetRegistry::RegisterNullStreamer(TheARMBETarget, createARMNullStreamer);
- TargetRegistry::RegisterNullStreamer(TheThumbLETarget, createARMNullStreamer);
- TargetRegistry::RegisterNullStreamer(TheThumbBETarget, createARMNullStreamer);
+ // Register the null TargetStreamer.
+ TargetRegistry::RegisterNullTargetStreamer(TheARMLETarget,
+ createARMNullTargetStreamer);
+ TargetRegistry::RegisterNullTargetStreamer(TheARMBETarget,
+ createARMNullTargetStreamer);
+ TargetRegistry::RegisterNullTargetStreamer(TheThumbLETarget,
+ createARMNullTargetStreamer);
+ TargetRegistry::RegisterNullTargetStreamer(TheThumbBETarget,
+ createARMNullTargetStreamer);
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(TheARMLETarget, createARMMCInstPrinter);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index a6c20d5..c17e959 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -29,6 +29,7 @@ class MCRegisterInfo;
class MCSubtargetInfo;
class MCStreamer;
class MCRelocationInfo;
+class MCTargetStreamer;
class StringRef;
class Target;
class raw_ostream;
@@ -51,7 +52,7 @@ MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
MCInstPrinter *InstPrint, MCCodeEmitter *CE,
MCAsmBackend *TAB, bool ShowInst);
-MCStreamer *createARMNullStreamer(MCContext &Ctx);
+MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S);
MCCodeEmitter *createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 7da5003..3187d36 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -54,10 +54,10 @@ public:
: MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
/*UseAggressiveSymbolFolding=*/true) {}
- void RecordRelocation(MachObjectWriter *Writer,
- const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFragment *Fragment, const MCFixup &Fixup,
- MCValue Target, uint64_t &FixedValue) override;
+ void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
+ const MCAsmLayout &Layout, const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) override;
};
}
@@ -232,7 +232,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
(IsPCRel << 30) |
MachO::R_SCATTERED);
MRE.r_word1 = Value2;
- Writer->addRelocation(Fragment->getParent(), MRE);
+ Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
}
MachO::any_relocation_info MRE;
@@ -243,7 +243,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
(IsPCRel << 30) |
MachO::R_SCATTERED);
MRE.r_word1 = Value;
- Writer->addRelocation(Fragment->getParent(), MRE);
+ Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
}
void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
@@ -297,7 +297,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
(IsPCRel << 30) |
MachO::R_SCATTERED);
MRE.r_word1 = Value2;
- Writer->addRelocation(Fragment->getParent(), MRE);
+ Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
}
MachO::any_relocation_info MRE;
@@ -307,7 +307,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
(IsPCRel << 30) |
MachO::R_SCATTERED);
MRE.r_word1 = Value;
- Writer->addRelocation(Fragment->getParent(), MRE);
+ Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
}
bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
@@ -351,11 +351,10 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
}
void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
- const MCAssembler &Asm,
+ MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
- const MCFixup &Fixup,
- MCValue Target,
+ const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
unsigned Log2Size;
@@ -401,8 +400,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
// See <reloc.h>.
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
unsigned Index = 0;
- unsigned IsExtern = 0;
unsigned Type = 0;
+ const MCSymbolData *RelSymbol = nullptr;
if (Target.isAbsolute()) { // constant
// FIXME!
@@ -422,8 +421,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
// Check whether we need an external or internal relocation.
if (requiresExternRelocation(Writer, Asm, *Fragment, RelocType, SD,
FixedValue)) {
- IsExtern = 1;
- Index = SD->getIndex();
+ RelSymbol = SD;
// For external relocations, make sure to offset the fixup value to
// compensate for the addend of the symbol address, if it was
@@ -447,11 +445,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
// struct relocation_info (8 bytes)
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
- MRE.r_word1 = ((Index << 0) |
- (IsPCRel << 24) |
- (Log2Size << 25) |
- (IsExtern << 27) |
- (Type << 28));
+ MRE.r_word1 =
+ (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
// Even when it's not a scattered relocation, movw/movt always uses
// a PAIR relocation.
@@ -476,10 +471,10 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
(Log2Size << 25) |
(MachO::ARM_RELOC_PAIR << 28));
- Writer->addRelocation(Fragment->getParent(), MREPair);
+ Writer->addRelocation(nullptr, Fragment->getParent(), MREPair);
}
- Writer->addRelocation(Fragment->getParent(), MRE);
+ Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
}
MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 8acd7af..b680db5 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -63,6 +63,7 @@ void ARMTargetStreamer::emitIntTextAttribute(unsigned Attribute,
unsigned IntValue,
StringRef StringValue) {}
void ARMTargetStreamer::emitArch(unsigned Arch) {}
+void ARMTargetStreamer::emitArchExtension(unsigned ArchExt) {}
void ARMTargetStreamer::emitObjectArch(unsigned Arch) {}
void ARMTargetStreamer::emitFPU(unsigned FPU) {}
void ARMTargetStreamer::finishAttributeSection() {}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index d31f1f4..2fd6445 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -8,7 +8,10 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/ARMFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCWinCOFFObjectWriter.h"
#include "llvm/Support/COFF.h"
@@ -26,14 +29,16 @@ public:
virtual ~ARMWinCOFFObjectWriter() { }
unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
- bool IsCrossSection) const override;
+ bool IsCrossSection,
+ const MCAsmBackend &MAB) const override;
bool recordRelocation(const MCFixup &) const override;
};
unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target,
const MCFixup &Fixup,
- bool IsCrossSection) const {
+ bool IsCrossSection,
+ const MCAsmBackend &MAB) const {
assert(getMachine() == COFF::IMAGE_FILE_MACHINE_ARMNT &&
"AArch64 support not yet implemented");
@@ -41,7 +46,10 @@ unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target,
Target.isAbsolute() ? MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
switch (static_cast<unsigned>(Fixup.getKind())) {
- default: llvm_unreachable("unsupported relocation type");
+ default: {
+ const MCFixupKindInfo &Info = MAB.getFixupKindInfo(Fixup.getKind());
+ report_fatal_error(Twine("unsupported relocation type: ") + Info.Name);
+ }
case FK_Data_4:
switch (Modifier) {
case MCSymbolRefExpr::VK_COFF_IMGREL32:
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 35fe9b3..51e519d 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -381,7 +381,7 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo());
TRI = Fn.getSubtarget().getRegisterInfo();
MRI = &Fn.getRegInfo();
- const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
+ const ARMSubtarget *STI = &Fn.getSubtarget<ARMSubtarget>();
isLikeA9 = STI->isLikeA9() || STI->isSwift();
isSwift = STI->isSwift();
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 6deab4f..7dcc64e 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -52,9 +52,9 @@ void Thumb1FrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const Thumb1InstrInfo &TII =
- *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo());
- const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
+ *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
+ const Thumb1RegisterInfo *RegInfo =
+ static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo());
if (!hasReservedCallFrame(MF)) {
// If we have alloca, convert as follows:
// ADJCALLSTACKDOWN -> sub, sp, sp, amount
@@ -89,15 +89,12 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
+ const Thumb1RegisterInfo *RegInfo =
+ static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo());
const Thumb1InstrInfo &TII =
- *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo());
+ *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
- unsigned Align = MF.getTarget()
- .getSubtargetImpl()
- ->getFrameLowering()
- ->getStackAlignment();
+ unsigned Align = STI.getFrameLowering()->getStackAlignment();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
unsigned NumBytes = MFI->getStackSize();
assert(NumBytes >= ArgRegsSaveSize &&
@@ -124,7 +121,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
if (!AFI->hasStackFrame()) {
@@ -135,7 +133,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
return;
}
@@ -199,7 +198,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
E = CSI.end(); I != E; ++I) {
@@ -226,7 +226,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI)));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
break;
}
}
@@ -244,13 +245,15 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
} else {
unsigned CFIIndex =
MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(
nullptr, MRI->getDwarfRegNum(FramePtr, true)));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
if (NumBytes > 508)
// If offset is > 508 then sp cannot be adjusted in a single instruction,
@@ -267,7 +270,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
}
@@ -324,15 +328,12 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
+ const Thumb1RegisterInfo *RegInfo =
+ static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo());
const Thumb1InstrInfo &TII =
- *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo());
+ *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
- unsigned Align = MF.getTarget()
- .getSubtargetImpl()
- ->getFrameLowering()
- ->getStackAlignment();
+ unsigned Align = STI.getFrameLowering()->getStackAlignment();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
int NumBytes = (int)MFI->getStackSize();
assert((unsigned)NumBytes >= ArgRegsSaveSize &&
@@ -459,8 +460,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
return false;
DebugLoc DL;
- MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
if (MI != MBB.end()) DL = MI->getDebugLoc();
@@ -499,7 +499,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
bool isVarArg = AFI->getArgRegsSaveSize() > 0;
DebugLoc DL = MI->getDebugLoc();
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 8ea912e..c24f740 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -44,7 +44,7 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool KillSrc) const {
// Need to check the arch.
MachineFunction &MF = *MBB.getParent();
- const ARMSubtarget &st = MF.getTarget().getSubtarget<ARMSubtarget>();
+ const ARMSubtarget &st = MF.getSubtarget<ARMSubtarget>();
assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
"Thumb1 can only copy GPR registers");
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index c10c809..5e2cbdc 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -71,7 +71,7 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
"Thumb1 does not have ldr to high register");
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C = ConstantInt::get(
Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
@@ -234,7 +234,6 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
// If we would emit the copy with an immediate of 0, just use tMOVr.
if (CopyOpc && Bytes < CopyScale) {
CopyOpc = ARM::tMOVr;
- CopyBits = 0;
CopyScale = 1;
CopyNeedsCC = false;
CopyRange = 0;
@@ -389,12 +388,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
void Thumb1RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
int64_t Offset) const {
- const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo *>(MI.getParent()
- ->getParent()
- ->getTarget()
- .getSubtargetImpl()
- ->getInstrInfo());
+ const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
int Off = Offset; // ARM doesn't need the general 64-bit offsets
unsigned i = 0;
@@ -420,7 +414,7 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
// off the frame pointer (if, for example, there are alloca() calls in
// the function, the offset will be negative. Use R12 instead since that's
// a call clobbered register that we know won't be used in Thumb1 mode.
- const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
DebugLoc DL;
AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
.addReg(ARM::R12, RegState::Define)
@@ -466,8 +460,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc dl = MI.getDebugLoc();
MachineInstrBuilder MIB(*MBB.getParent(), &MI);
@@ -478,8 +471,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MF.getFrameInfo()->getStackSize() + SPAdj;
if (MF.getFrameInfo()->hasVarSizedObjects()) {
- assert(SPAdj == 0 && MF.getSubtarget().getFrameLowering()->hasFP(MF) &&
- "Unexpected");
+ assert(SPAdj == 0 && STI.getFrameLowering()->hasFP(MF) && "Unexpected");
// There are alloca()'s in this function, must reference off the frame
// pointer or base pointer instead.
if (!hasBasePointer(MF)) {
@@ -495,10 +487,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// when !hasReservedCallFrame().
#ifndef NDEBUG
if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){
- assert(MF.getTarget()
- .getSubtargetImpl()
- ->getFrameLowering()
- ->hasReservedCallFrame(MF) &&
+ assert(STI.getFrameLowering()->hasReservedCallFrame(MF) &&
"Cannot use SP to access the emergency spill slot in "
"functions without a reserved call frame");
assert(!MF.getFrameInfo()->hasVarSizedObjects() &&
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index fdcb522..b657f2d 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -253,12 +253,12 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
}
bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
- const TargetMachine &TM = Fn.getTarget();
+ const ARMSubtarget &STI =
+ static_cast<const ARMSubtarget &>(Fn.getSubtarget());
AFI = Fn.getInfo<ARMFunctionInfo>();
- TII = static_cast<const Thumb2InstrInfo *>(
- TM.getSubtargetImpl()->getInstrInfo());
- TRI = TM.getSubtargetImpl()->getRegisterInfo();
- restrictIT = TM.getSubtarget<ARMSubtarget>().restrictIT();
+ TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
+ TRI = STI.getRegisterInfo();
+ restrictIT = STI.restrictIT();
if (!AFI->isThumbFunction())
return false;
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 91973e1..62c3752 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -574,13 +574,10 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
}
} else if (AddrMode == ARMII::AddrModeT2_i8s4) {
Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4;
- NumBits = 8;
- // MCInst operand has already scaled value.
+ NumBits = 10; // 8 bits scaled by 4
+ // MCInst operand expects already scaled value.
Scale = 1;
- if (Offset < 0) {
- isSub = true;
- Offset = -Offset;
- }
+ assert((Offset & 3) == 0 && "Can't encode this offset!");
} else {
llvm_unreachable("Unsupported addressing mode!");
}
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index c51eb8b..2ee908b 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -1001,17 +1001,12 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
}
bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
- const TargetMachine &TM = MF.getTarget();
- TII = static_cast<const Thumb2InstrInfo *>(
- TM.getSubtargetImpl()->getInstrInfo());
- STI = &TM.getSubtarget<ARMSubtarget>();
+ STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
// Optimizing / minimizing size?
- AttributeSet FnAttrs = MF.getFunction()->getAttributes();
- OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize);
- MinimizeSize =
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+ OptimizeSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+ MinimizeSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
BlockInfo.clear();
BlockInfo.resize(MF.getNumBlockIDs());