aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARM.h4
-rw-r--r--lib/Target/ARM/ARM.td57
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp754
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h42
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp654
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h20
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp501
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h29
-rw-r--r--lib/Target/ARM/ARMCallingConv.h2
-rw-r--r--lib/Target/ARM/ARMCallingConv.td2
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp30
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp18
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.cpp29
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h11
-rw-r--r--lib/Target/ARM/ARMELFWriterInfo.cpp78
-rw-r--r--lib/Target/ARM/ARMELFWriterInfo.h59
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp71
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp257
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp51
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.cpp2
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp324
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp681
-rw-r--r--lib/Target/ARM/ARMISelLowering.h46
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td21
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp62
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td126
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td168
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td5
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td67
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td4
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp5
-rw-r--r--lib/Target/ARM/ARMJITInfo.h4
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp30
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp2
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h16
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td23
-rw-r--r--lib/Target/ARM/ARMSchedule.td2
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td597
-rw-r--r--lib/Target/ARM/ARMScheduleSwift.td1085
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp4
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp12
-rw-r--r--lib/Target/ARM/ARMSubtarget.h27
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp26
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h35
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp15
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.h5
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp124
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmLexer.cpp138
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp350
-rw-r--r--lib/Target/ARM/AsmParser/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/CMakeLists.txt3
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp76
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp626
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h4
-rwxr-xr-xlib/Target/ARM/LICENSE.TXT47
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp128
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp20
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp201
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h27
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp14
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.h5
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp16
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp54
-rw-r--r--lib/Target/ARM/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp82
-rw-r--r--lib/Target/ARM/Makefile2
-rw-r--r--lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp2
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp4
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp2
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp21
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp6
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp6
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.cpp6
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp260
76 files changed, 5782 insertions, 2511 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 2a1e8e4..5faf8c3 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -37,12 +37,16 @@ FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
+FunctionPass *createARMGlobalBaseRegPass();
FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
FunctionPass *createARMConstantIslandPass();
FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass();
FunctionPass *createThumb2SizeReductionPass();
+/// \brief Creates an ARM-specific Target Transformation Info pass.
+ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM);
+
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 69e2346..a76715a 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -32,9 +32,6 @@ def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
"Enable VFP3 instructions",
[FeatureVFP2]>;
-def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
- "Enable VFP4 instructions",
- [FeatureVFP3]>;
def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
"Enable NEON instructions",
[FeatureVFP3]>;
@@ -44,10 +41,16 @@ def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
"Does not support ARM mode execution">;
def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
"Enable half-precision floating point">;
+def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
+ "Enable VFP4 instructions",
+ [FeatureVFP3, FeatureFP16]>;
def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
"Restrict VFP3 to 16 double registers">;
def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
"Enable divide instructions">;
+def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm",
+ "HasHardwareDivideInARM", "true",
+ "Enable divide instructions in ARM mode">;
def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
"Enable Thumb2 extract and pack instructions">;
def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
@@ -86,6 +89,10 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
"AvoidCPSRPartialUpdate", "true",
"Avoid CPSR partial update for OOO execution">;
+def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
+ "AvoidMOVsShifterOperand", "true",
+ "Avoid movs instructions with shifter operand">;
+
// Some processors perform return stack prediction. CodeGen should avoid issue
// "normal" call instructions to callees which do not return.
def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true",
@@ -129,6 +136,11 @@ def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
include "ARMSchedule.td"
// ARM processor families.
+def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
+ "Cortex-A5 ARM processors",
+ [FeatureSlowFPBrcc, FeatureNEONForFP,
+ FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
+ FeatureT2XtPk]>;
def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
"Cortex-A8 ARM processors",
[FeatureSlowFPBrcc, FeatureNEONForFP,
@@ -139,6 +151,25 @@ def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
[FeatureVMLxForwarding,
FeatureT2XtPk, FeatureFP16,
FeatureAvoidPartialCPSR]>;
+def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
+ "Swift ARM processors",
+ [FeatureNEONForFP, FeatureT2XtPk,
+ FeatureVFP4, FeatureMP, FeatureHWDiv,
+ FeatureHWDivARM, FeatureAvoidPartialCPSR,
+ FeatureAvoidMOVsShOp,
+ FeatureHasSlowFPVMLx]>;
+
+// FIXME: It has not been determined if A15 has these features.
+def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
+ "Cortex-A15 ARM processors",
+ [FeatureT2XtPk, FeatureFP16,
+ FeatureAvoidPartialCPSR]>;
+def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
+ "Cortex-R5 ARM processors",
+ [FeatureSlowFPBrcc, FeatureHWDivARM,
+ FeatureHasSlowFPVMLx,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
class ProcNoItin<string Name, list<SubtargetFeature> Features>
: Processor<Name, NoItineraries, Features>;
@@ -204,6 +235,11 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
FeatureDSPThumb2]>;
// V7a Processors.
+// FIXME: A5 has currently the same Schedule model as A8
+def : ProcessorModel<"cortex-a5", CortexA8Model,
+ [ProcA5, HasV7Ops, FeatureNEON, FeatureDB,
+ FeatureVFP4, FeatureDSPThumb2,
+ FeatureHasRAS]>;
def : ProcessorModel<"cortex-a8", CortexA8Model,
[ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
FeatureDSPThumb2, FeatureHasRAS]>;
@@ -214,6 +250,15 @@ def : ProcessorModel<"cortex-a9-mp", CortexA9Model,
[ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
FeatureDSPThumb2, FeatureMP,
FeatureHasRAS]>;
+// FIXME: A15 has currently the same ProcessorModel as A9.
+def : ProcessorModel<"cortex-a15", CortexA9Model,
+ [ProcA15, HasV7Ops, FeatureNEON, FeatureDB,
+ FeatureDSPThumb2, FeatureHasRAS]>;
+// FIXME: R5 has currently the same ProcessorModel as A8.
+def : ProcessorModel<"cortex-r5", CortexA8Model,
+ [ProcR5, HasV7Ops, FeatureDB,
+ FeatureVFP3, FeatureDSPThumb2,
+ FeatureHasRAS]>;
// V7M Processors.
def : ProcNoItin<"cortex-m3", [HasV7Ops,
@@ -227,6 +272,12 @@ def : ProcNoItin<"cortex-m4", [HasV7Ops,
FeatureT2XtPk, FeatureVFP4,
FeatureVFPOnlySP, FeatureMClass]>;
+// Swift uArch Processors.
+def : ProcessorModel<"swift", SwiftModel,
+ [ProcSwift, HasV7Ops, FeatureNEON,
+ FeatureDB, FeatureDSPThumb2,
+ FeatureHasRAS]>;
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index e9e2803..fc6ac90 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -23,31 +23,33 @@
#include "InstPrinter/ARMInstPrinter.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMMCExpr.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetMachine.h"
#include <cctype>
using namespace llvm;
@@ -302,7 +304,7 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
}
void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
- uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+ uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
assert(Size && "C++ constructor pointer had zero size!");
const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
@@ -389,16 +391,6 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
//===--------------------------------------------------------------------===//
MCSymbol *ARMAsmPrinter::
-GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
- const MachineBasicBlock *MBB) const {
- SmallString<60> Name;
- raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
- << getFunctionNumber() << '_' << uid << '_' << uid2
- << "_set_" << MBB->getNumber();
- return OutContext.GetOrCreateSymbol(Name.str());
-}
-
-MCSymbol *ARMAsmPrinter::
GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
SmallString<60> Name;
raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "JTI"
@@ -407,7 +399,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
}
-MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel(void) const {
+MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const {
SmallString<60> Name;
raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "SJLJEH"
<< getFunctionNumber();
@@ -592,9 +584,24 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
const TargetLoweringObjectFileMachO &TLOFMacho =
static_cast<const TargetLoweringObjectFileMachO &>(
getObjFileLowering());
- OutStreamer.SwitchSection(TLOFMacho.getTextSection());
- OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
- OutStreamer.SwitchSection(TLOFMacho.getConstTextCoalSection());
+
+ // Collect the set of sections our functions will go into.
+ SetVector<const MCSection *, SmallVector<const MCSection *, 8>,
+ SmallPtrSet<const MCSection *, 8> > TextSections;
+ // Default text section comes first.
+ TextSections.insert(TLOFMacho.getTextSection());
+ // Now any user defined text sections from function attributes.
+ for (Module::iterator F = M.begin(), e = M.end(); F != e; ++F)
+ if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage())
+ TextSections.insert(TLOFMacho.SectionForGlobal(F, Mang, TM));
+ // Now the coalescable sections.
+ TextSections.insert(TLOFMacho.getTextCoalSection());
+ TextSections.insert(TLOFMacho.getConstTextCoalSection());
+
+ // Emit the sections in the .s file header to fix the order.
+ for (unsigned i = 0, e = TextSections.size(); i != e; ++i)
+ OutStreamer.SwitchSection(TextSections[i]);
+
if (RelocM == Reloc::DynamicNoPIC) {
const MCSection *sect =
OutContext.getMachOSection("__TEXT", "__symbol_stub4",
@@ -743,13 +750,28 @@ void ARMAsmPrinter::emitAttributes() {
AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
ARMBuildAttrs::Allowed);
} else if (CPUString == "generic") {
- // FIXME: Why these defaults?
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T);
+ // For a generic CPU, we assume a standard v7a architecture in Subtarget.
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::ApplicationProfile);
AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
ARMBuildAttrs::Allowed);
AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::Allowed);
- }
+ ARMBuildAttrs::AllowThumb32);
+ } else if (Subtarget->hasV7Ops()) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumb32);
+ } else if (Subtarget->hasV6T2Ops())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6T2);
+ else if (Subtarget->hasV6Ops())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6);
+ else if (Subtarget->hasV5TEOps())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TE);
+ else if (Subtarget->hasV5TOps())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5T);
+ else if (Subtarget->hasV4TOps())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T);
if (Subtarget->hasNEON() && emitFPU) {
/* NEON is not exactly a VFP architecture, but GAS emit one of
@@ -893,7 +915,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) {
void ARMAsmPrinter::
EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
- int Size = TM.getTargetData()->getTypeAllocSize(MCPV->getType());
+ int Size = TM.getDataLayout()->getTypeAllocSize(MCPV->getType());
ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
@@ -1030,12 +1052,10 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
OutContext);
// If this isn't a TBB or TBH, the entries are direct branch instructions.
if (OffsetWidth == 4) {
- MCInst BrInst;
- BrInst.setOpcode(ARM::t2B);
- BrInst.addOperand(MCOperand::CreateExpr(MBBSymbolExpr));
- BrInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- BrInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(BrInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2B)
+ .addExpr(MBBSymbolExpr)
+ .addImm(ARMCC::AL)
+ .addReg(0));
continue;
}
// Otherwise it's an offset from the dispatch instruction. Construct an
@@ -1079,28 +1099,6 @@ void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
printOperand(MI, NOps-2, OS);
}
-static void populateADROperands(MCInst &Inst, unsigned Dest,
- const MCSymbol *Label,
- unsigned pred, unsigned ccreg,
- MCContext &Ctx) {
- const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, Ctx);
- Inst.addOperand(MCOperand::CreateReg(Dest));
- Inst.addOperand(MCOperand::CreateExpr(SymbolExpr));
- // Add predicate operands.
- Inst.addOperand(MCOperand::CreateImm(pred));
- Inst.addOperand(MCOperand::CreateReg(ccreg));
-}
-
-void ARMAsmPrinter::EmitPatchedInstruction(const MachineInstr *MI,
- unsigned Opcode) {
- MCInst TmpInst;
-
- // Emit the instruction as usual, just patch the opcode.
- LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
- TmpInst.setOpcode(Opcode);
- OutStreamer.EmitInstruction(TmpInst);
-}
-
void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
assert(MI->getFlag(MachineInstr::FrameSetup) &&
"Only instruction which are involved into frame setup code are allowed");
@@ -1277,154 +1275,104 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case ARM::tLEApcrel:
case ARM::t2LEApcrel: {
// FIXME: Need to also handle globals and externals
- MCInst TmpInst;
- TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrel ? ARM::t2ADR
- : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR
- : ARM::ADR));
- populateADROperands(TmpInst, MI->getOperand(0).getReg(),
- GetCPISymbol(MI->getOperand(1).getIndex()),
- MI->getOperand(2).getImm(), MI->getOperand(3).getReg(),
- OutContext);
- OutStreamer.EmitInstruction(TmpInst);
+ MCSymbol *CPISymbol = GetCPISymbol(MI->getOperand(1).getIndex());
+ OutStreamer.EmitInstruction(MCInstBuilder(MI->getOpcode() ==
+ ARM::t2LEApcrel ? ARM::t2ADR
+ : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR
+ : ARM::ADR))
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(MCSymbolRefExpr::Create(CPISymbol, OutContext))
+ // Add predicate operands.
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(MI->getOperand(3).getReg()));
return;
}
case ARM::LEApcrelJT:
case ARM::tLEApcrelJT:
case ARM::t2LEApcrelJT: {
- MCInst TmpInst;
- TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrelJT ? ARM::t2ADR
- : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR
- : ARM::ADR));
- populateADROperands(TmpInst, MI->getOperand(0).getReg(),
- GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(),
- MI->getOperand(2).getImm()),
- MI->getOperand(3).getImm(), MI->getOperand(4).getReg(),
- OutContext);
- OutStreamer.EmitInstruction(TmpInst);
+ MCSymbol *JTIPICSymbol =
+ GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(),
+ MI->getOperand(2).getImm());
+ OutStreamer.EmitInstruction(MCInstBuilder(MI->getOpcode() ==
+ ARM::t2LEApcrelJT ? ARM::t2ADR
+ : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR
+ : ARM::ADR))
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(MCSymbolRefExpr::Create(JTIPICSymbol, OutContext))
+ // Add predicate operands.
+ .addImm(MI->getOperand(3).getImm())
+ .addReg(MI->getOperand(4).getReg()));
return;
}
// Darwin call instructions are just normal call instructions with different
// clobber semantics (they clobber R9).
case ARM::BX_CALL: {
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
+ .addReg(ARM::LR)
+ .addReg(ARM::PC)
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// Add 's' bit operand (always reg0 for this)
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::BX);
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::BX)
+ .addReg(MI->getOperand(0).getReg()));
return;
}
case ARM::tBX_CALL: {
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr)
+ .addReg(ARM::LR)
+ .addReg(ARM::PC)
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tBX);
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tBX)
+ .addReg(MI->getOperand(0).getReg())
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addImm(ARMCC::AL)
+ .addReg(0));
return;
}
case ARM::BMOVPCRX_CALL: {
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
+ .addReg(ARM::LR)
+ .addReg(ARM::PC)
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// Add 's' bit operand (always reg0 for this)
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
+ .addReg(ARM::PC)
+ .addImm(MI->getOperand(0).getReg())
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// Add 's' bit operand (always reg0 for this)
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addReg(0));
return;
}
case ARM::BMOVPCB_CALL: {
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
+ .addReg(ARM::LR)
+ .addReg(ARM::PC)
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// Add 's' bit operand (always reg0 for this)
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::Bcc);
- const GlobalValue *GV = MI->getOperand(0).getGlobal();
- MCSymbol *GVSym = Mang->getSymbol(GV);
- const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
- TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr));
- // Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- return;
- }
- case ARM::t2BMOVPCB_CALL: {
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- // Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::t2B);
- const GlobalValue *GV = MI->getOperand(0).getGlobal();
- MCSymbol *GVSym = Mang->getSymbol(GV);
- const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
- TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr));
+ .addReg(0));
+
+ const GlobalValue *GV = MI->getOperand(0).getGlobal();
+ MCSymbol *GVSym = Mang->getSymbol(GV);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::Bcc)
+ .addExpr(GVSymExpr)
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addImm(ARMCC::AL)
+ .addReg(0));
return;
}
case ARM::MOVi16_ga_pcrel:
@@ -1512,15 +1460,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutContext));
// Form and emit the add.
- MCInst AddInst;
- AddInst.setOpcode(ARM::tADDhirr);
- AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
- // Add predicate operands.
- AddInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- AddInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(AddInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tADDhirr)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(ARM::PC)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
return;
}
case ARM::PICADD: {
@@ -1535,17 +1481,15 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutContext));
// Form and emit the add.
- MCInst AddInst;
- AddInst.setOpcode(ARM::ADDrr);
- AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
- AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
- // Add predicate operands.
- AddInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
- AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
- // Add 's' bit operand (always reg0 for this)
- AddInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(AddInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDrr)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(1).getReg())
+ // Add predicate operands.
+ .addImm(MI->getOperand(3).getImm())
+ .addReg(MI->getOperand(4).getReg())
+ // Add 's' bit operand (always reg0 for this)
+ .addReg(0));
return;
}
case ARM::PICSTR:
@@ -1581,16 +1525,14 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case ARM::PICLDRSB: Opcode = ARM::LDRSB; break;
case ARM::PICLDRSH: Opcode = ARM::LDRSH; break;
}
- MCInst LdStInst;
- LdStInst.setOpcode(Opcode);
- LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- LdStInst.addOperand(MCOperand::CreateReg(ARM::PC));
- LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
- LdStInst.addOperand(MCOperand::CreateImm(0));
- // Add predicate operands.
- LdStInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
- LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
- OutStreamer.EmitInstruction(LdStInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(Opcode)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(0)
+ // Add predicate operands.
+ .addImm(MI->getOperand(3).getImm())
+ .addReg(MI->getOperand(4).getReg()));
return;
}
@@ -1620,29 +1562,26 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case ARM::t2BR_JT: {
// Lower and emit the instruction itself, then the jump table following it.
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- // Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
// Output the data for the jump table itself
EmitJump2Table(MI);
return;
}
case ARM::t2TBB_JT: {
// Lower and emit the instruction itself, then the jump table following it.
- MCInst TmpInst;
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2TBB)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
- TmpInst.setOpcode(ARM::t2TBB);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- // Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
// Output the data for the jump table itself
EmitJump2Table(MI);
// Make sure the next instruction is 2-byte aligned.
@@ -1651,15 +1590,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case ARM::t2TBH_JT: {
// Lower and emit the instruction itself, then the jump table following it.
- MCInst TmpInst;
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2TBH)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
- TmpInst.setOpcode(ARM::t2TBH);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- // Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
// Output the data for the jump table itself
EmitJump2Table(MI);
return;
@@ -1719,17 +1656,15 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case ARM::BR_JTadd: {
// Lower and emit the instruction itself, then the jump table following it.
// add pc, target, idx
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::ADDrr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
- // Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- // Add 's' bit operand (always reg0 for this)
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDrr)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // Add 's' bit operand (always reg0 for this)
+ .addReg(0));
// Output the data for the jump table itself
EmitJumpTable(MI);
@@ -1773,75 +1708,57 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
unsigned SrcReg = MI->getOperand(0).getReg();
unsigned ValReg = MI->getOperand(1).getReg();
MCSymbol *Label = GetARMSJLJEHLabel();
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ValReg));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ OutStreamer.AddComment("eh_setjmp begin");
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr)
+ .addReg(ValReg)
+ .addReg(ARM::PC)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.AddComment("eh_setjmp begin");
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tADDi3);
- TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tADDi3)
+ .addReg(ValReg)
// 's' bit operand
- TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
- TmpInst.addOperand(MCOperand::CreateReg(ValReg));
- TmpInst.addOperand(MCOperand::CreateImm(7));
+ .addReg(ARM::CPSR)
+ .addReg(ValReg)
+ .addImm(7)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tSTRi);
- TmpInst.addOperand(MCOperand::CreateReg(ValReg));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tSTRi)
+ .addReg(ValReg)
+ .addReg(SrcReg)
// The offset immediate is #4. The operand value is scaled by 4 for the
// tSTR instruction.
- TmpInst.addOperand(MCOperand::CreateImm(1));
+ .addImm(1)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVi8);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
- TmpInst.addOperand(MCOperand::CreateImm(0));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVi8)
+ .addReg(ARM::R0)
+ .addReg(ARM::CPSR)
+ .addImm(0)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext);
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tB);
- TmpInst.addOperand(MCOperand::CreateExpr(SymbolExpr));
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVi8);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
- TmpInst.addOperand(MCOperand::CreateImm(1));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tB)
+ .addExpr(SymbolExpr)
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.AddComment("eh_setjmp end");
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVi8)
+ .addReg(ARM::R0)
+ .addReg(ARM::CPSR)
+ .addImm(1)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.AddComment("eh_setjmp end");
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
OutStreamer.EmitLabel(Label);
return;
}
@@ -1857,69 +1774,53 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
unsigned SrcReg = MI->getOperand(0).getReg();
unsigned ValReg = MI->getOperand(1).getReg();
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::ADDri);
- TmpInst.addOperand(MCOperand::CreateReg(ValReg));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateImm(8));
+ OutStreamer.AddComment("eh_setjmp begin");
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDri)
+ .addReg(ValReg)
+ .addReg(ARM::PC)
+ .addImm(8)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// 's' bit operand (always reg0 for this).
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.AddComment("eh_setjmp begin");
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::STRi12);
- TmpInst.addOperand(MCOperand::CreateReg(ValReg));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
- TmpInst.addOperand(MCOperand::CreateImm(4));
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::STRi12)
+ .addReg(ValReg)
+ .addReg(SrcReg)
+ .addImm(4)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVi);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
- TmpInst.addOperand(MCOperand::CreateImm(0));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVi)
+ .addReg(ARM::R0)
+ .addImm(0)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// 's' bit operand (always reg0 for this).
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::ADDri);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateImm(0));
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDri)
+ .addReg(ARM::PC)
+ .addReg(ARM::PC)
+ .addImm(0)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// 's' bit operand (always reg0 for this).
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVi);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
- TmpInst.addOperand(MCOperand::CreateImm(1));
+ .addReg(0));
+
+ OutStreamer.AddComment("eh_setjmp end");
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVi)
+ .addReg(ARM::R0)
+ .addImm(1)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// 's' bit operand (always reg0 for this).
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.AddComment("eh_setjmp end");
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addReg(0));
return;
}
case ARM::Int_eh_sjlj_longjmp: {
@@ -1929,48 +1830,35 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// bx $scratch
unsigned SrcReg = MI->getOperand(0).getReg();
unsigned ScratchReg = MI->getOperand(1).getReg();
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::LDRi12);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
- TmpInst.addOperand(MCOperand::CreateImm(8));
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12)
+ .addReg(ARM::SP)
+ .addReg(SrcReg)
+ .addImm(8)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::LDRi12);
- TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
- TmpInst.addOperand(MCOperand::CreateImm(4));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12)
+ .addReg(ScratchReg)
+ .addReg(SrcReg)
+ .addImm(4)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::LDRi12);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
- TmpInst.addOperand(MCOperand::CreateImm(0));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12)
+ .addReg(ARM::R7)
+ .addReg(SrcReg)
+ .addImm(0)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::BX);
- TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::BX)
+ .addReg(ScratchReg)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addImm(ARMCC::AL)
+ .addReg(0));
return;
}
case ARM::tInt_eh_sjlj_longjmp: {
@@ -1981,60 +1869,44 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// bx $scratch
unsigned SrcReg = MI->getOperand(0).getReg();
unsigned ScratchReg = MI->getOperand(1).getReg();
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tLDRi);
- TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi)
+ .addReg(ScratchReg)
+ .addReg(SrcReg)
// The offset immediate is #8. The operand value is scaled by 4 for the
// tLDR instruction.
- TmpInst.addOperand(MCOperand::CreateImm(2));
+ .addImm(2)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
- TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr)
+ .addReg(ARM::SP)
+ .addReg(ScratchReg)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tLDRi);
- TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
- TmpInst.addOperand(MCOperand::CreateImm(1));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi)
+ .addReg(ScratchReg)
+ .addReg(SrcReg)
+ .addImm(1)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tLDRi);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
- TmpInst.addOperand(MCOperand::CreateImm(0));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi)
+ .addReg(ARM::R7)
+ .addReg(SrcReg)
+ .addImm(0)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tBX);
- TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tBX)
+ .addReg(ScratchReg)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addImm(ARMCC::AL)
+ .addReg(0));
return;
}
}
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 3555e8f..f7392fb 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -53,7 +53,7 @@ public:
Subtarget = &TM.getSubtarget<ARMSubtarget>();
}
- virtual const char *getPassName() const {
+ virtual const char *getPassName() const LLVM_OVERRIDE {
return "ARM Assembly Printer";
}
@@ -62,22 +62,24 @@ public:
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
+ raw_ostream &O) LLVM_OVERRIDE;
virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant,
- const char *ExtraCode, raw_ostream &O);
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) LLVM_OVERRIDE;
void EmitJumpTable(const MachineInstr *MI);
void EmitJump2Table(const MachineInstr *MI);
- virtual void EmitInstruction(const MachineInstr *MI);
- bool runOnMachineFunction(MachineFunction &F);
+ virtual void EmitInstruction(const MachineInstr *MI) LLVM_OVERRIDE;
+ virtual bool runOnMachineFunction(MachineFunction &F) LLVM_OVERRIDE;
- virtual void EmitConstantPool() {} // we emit constant pools customly!
- virtual void EmitFunctionBodyEnd();
- virtual void EmitFunctionEntryLabel();
- void EmitStartOfAsmFile(Module &M);
- void EmitEndOfAsmFile(Module &M);
- void EmitXXStructor(const Constant *CV);
+ virtual void EmitConstantPool() LLVM_OVERRIDE {
+ // we emit constant pools customly!
+ }
+ virtual void EmitFunctionBodyEnd() LLVM_OVERRIDE;
+ virtual void EmitFunctionEntryLabel() LLVM_OVERRIDE;
+ virtual void EmitStartOfAsmFile(Module &M) LLVM_OVERRIDE;
+ virtual void EmitEndOfAsmFile(Module &M) LLVM_OVERRIDE;
+ virtual void EmitXXStructor(const Constant *CV) LLVM_OVERRIDE;
// lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
@@ -101,12 +103,13 @@ private:
public:
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
- MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+ virtual MachineLocation
+ getDebugValueLocation(const MachineInstr *MI) const LLVM_OVERRIDE;
/// EmitDwarfRegOp - Emit dwarf register operation.
- virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const;
+ virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const LLVM_OVERRIDE;
- virtual unsigned getISAEncoding() {
+ virtual unsigned getISAEncoding() LLVM_OVERRIDE {
// ARM/Darwin adds ISA to the DWARF info for each function.
if (!Subtarget->isTargetDarwin())
return 0;
@@ -114,18 +117,19 @@ public:
ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm;
}
+private:
MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
- MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
- const MachineBasicBlock *MBB) const;
MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
- MCSymbol *GetARMSJLJEHLabel(void) const;
+ MCSymbol *GetARMSJLJEHLabel() const;
MCSymbol *GetARMGVSymbol(const GlobalValue *GV);
+public:
/// EmitMachineConstantPoolValue - Print a machine constantpool value to
/// the .s file.
- virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
+ virtual void
+ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) LLVM_OVERRIDE;
};
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index e2f0d7d..0076910 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -18,9 +18,7 @@
#include "ARMHazardRecognizer.h"
#include "ARMMachineFunctionInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalValue.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -29,12 +27,14 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/STLExtras.h"
#define GET_INSTRINFO_CTOR
#include "ARMGenInstrInfo.inc"
@@ -49,6 +49,11 @@ static cl::opt<bool>
WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
cl::desc("Widen ARM vmovs to vmovd when possible"));
+static cl::opt<unsigned>
+SwiftPartialUpdateClearance("swift-partial-update-clearance",
+ cl::Hidden, cl::init(12),
+ cl::desc("Clearance before partial register updates"));
+
/// ARM_MLxEntry - Record information about MLA / MLS instructions.
struct ARM_MLxEntry {
uint16_t MLxOpc; // MLA / MLS opcode
@@ -101,7 +106,7 @@ CreateTargetHazardRecognizer(const TargetMachine *TM,
const InstrItineraryData *II = TM->getInstrItineraryData();
return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
}
- return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
+ return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
}
ScheduleHazardRecognizer *ARMBaseInstrInfo::
@@ -110,7 +115,7 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
if (Subtarget.isThumb2() || Subtarget.hasVFP2())
return (ScheduleHazardRecognizer *)
new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG);
- return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG);
+ return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
}
MachineInstr *
@@ -459,8 +464,9 @@ PredicateInstruction(MachineInstr *MI,
unsigned Opc = MI->getOpcode();
if (isUncondBranchOpcode(Opc)) {
MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
- MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm()));
- MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addImm(Pred[0].getImm())
+ .addReg(Pred[1].getReg());
return true;
}
@@ -697,6 +703,8 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3;
else if (ARM::DQuadRegClass.contains(DestReg, SrcReg))
Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4;
+ else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2;
else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg))
Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2;
@@ -786,6 +794,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -933,6 +948,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
@@ -958,6 +974,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (ARM::DPRRegClass.hasSubClassEq(RC)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+ unsigned LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA : ARM::LDMIA;
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(LdmOpc))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -1130,6 +1155,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
// All clear, widen the COPY.
DEBUG(dbgs() << "widening: " << *MI);
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
// Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg
// or some other super-register.
@@ -1141,14 +1167,14 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
MI->setDesc(get(ARM::VMOVD));
MI->getOperand(0).setReg(DstRegD);
MI->getOperand(1).setReg(SrcRegD);
- AddDefaultPred(MachineInstrBuilder(MI));
+ AddDefaultPred(MIB);
// We are now reading SrcRegD instead of SrcRegS. This may upset the
// register scavenger and machine verifier, so we need to indicate that we
// are reading an undefined value from SrcRegD, but a proper value from
// SrcRegS.
MI->getOperand(1).setIsUndef();
- MachineInstrBuilder(MI).addReg(SrcRegS, RegState::Implicit);
+ MIB.addReg(SrcRegS, RegState::Implicit);
// SrcRegD may actually contain an unrelated value in the ssub_1
// sub-register. Don't kill it. Only kill the ssub_0 sub-register.
@@ -1245,7 +1271,7 @@ reMaterialize(MachineBasicBlock &MBB,
MachineInstr *
ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
- MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF);
+ MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF);
switch(Orig->getOpcode()) {
case ARM::tLDRpci_pic:
case ARM::t2LDRpci_pic: {
@@ -1349,6 +1375,9 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
/// only return true if the base pointers are the same and the only differences
/// between the two addresses is the offset. It also returns the offsets by
/// reference.
+///
+/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
+/// is permanently disabled.
bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
int64_t &Offset1,
int64_t &Offset2) const {
@@ -1389,7 +1418,6 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case ARM::VLDRD:
case ARM::VLDRS:
case ARM::t2LDRi8:
- case ARM::t2LDRDi8:
case ARM::t2LDRSHi8:
case ARM::t2LDRi12:
case ARM::t2LDRSHi12:
@@ -1424,6 +1452,9 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
/// from the common base address. It returns true if it decides it's desirable
/// to schedule the two loads together. "NumLoads" is the number of loads that
/// have already been scheduled after Load1.
+///
+/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
+/// is permanently disabled.
bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
int64_t Offset1, int64_t Offset2,
unsigned NumLoads) const {
@@ -1528,6 +1559,14 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
}
+bool
+ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const {
+ // Reduce false anti-dependencies to let Swift's out-of-order execution
+ // engine do its thing.
+ return Subtarget.isSwift();
+}
+
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
@@ -1567,7 +1606,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
// MOVCC AL can't be inverted. Shouldn't happen.
if (CC == ARMCC::AL || PredReg != ARM::CPSR)
return NULL;
- MI = TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+ MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
if (!MI)
return NULL;
// After swapping the MOVCC operands, also invert the condition.
@@ -1576,7 +1615,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
return MI;
}
}
- return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstruction(MI, NewMI);
}
/// Identify instructions that can be folded into a MOVCC instruction, and
@@ -1679,7 +1718,7 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
// same register as operand 0.
MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
FalseReg.setImplicit();
- NewMI->addOperand(FalseReg);
+ NewMI.addOperand(FalseReg);
NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
// The caller will erase MI, but not DefMI.
@@ -2342,6 +2381,260 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
return true;
}
+static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: {
+ const MCInstrDesc &Desc = MI->getDesc();
+ int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
+ assert(UOps >= 0 && "bad # UOps");
+ return UOps;
+ }
+
+ case ARM::LDRrs:
+ case ARM::LDRBrs:
+ case ARM::STRrs:
+ case ARM::STRBrs: {
+ unsigned ShOpVal = MI->getOperand(3).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 1;
+ return 2;
+ }
+
+ case ARM::LDRH:
+ case ARM::STRH: {
+ if (!MI->getOperand(2).getReg())
+ return 1;
+
+ unsigned ShOpVal = MI->getOperand(3).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 1;
+ return 2;
+ }
+
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2;
+
+ case ARM::LDRSB_POST:
+ case ARM::LDRSH_POST: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ return (Rt == Rm) ? 4 : 3;
+ }
+
+ case ARM::LDR_PRE_REG:
+ case ARM::LDRB_PRE_REG: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rt == Rm)
+ return 3;
+ unsigned ShOpVal = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 2;
+ return 3;
+ }
+
+ case ARM::STR_PRE_REG:
+ case ARM::STRB_PRE_REG: {
+ unsigned ShOpVal = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 2;
+ return 3;
+ }
+
+ case ARM::LDRH_PRE:
+ case ARM::STRH_PRE: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (!Rm)
+ return 2;
+ if (Rt == Rm)
+ return 3;
+ return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub)
+ ? 3 : 2;
+ }
+
+ case ARM::LDR_POST_REG:
+ case ARM::LDRB_POST_REG:
+ case ARM::LDRH_POST: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ return (Rt == Rm) ? 3 : 2;
+ }
+
+ case ARM::LDR_PRE_IMM:
+ case ARM::LDRB_PRE_IMM:
+ case ARM::LDR_POST_IMM:
+ case ARM::LDRB_POST_IMM:
+ case ARM::STRB_POST_IMM:
+ case ARM::STRB_POST_REG:
+ case ARM::STRB_PRE_IMM:
+ case ARM::STRH_POST:
+ case ARM::STR_POST_IMM:
+ case ARM::STR_POST_REG:
+ case ARM::STR_PRE_IMM:
+ return 2;
+
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSH_PRE: {
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rm == 0)
+ return 3;
+ unsigned Rt = MI->getOperand(0).getReg();
+ if (Rt == Rm)
+ return 4;
+ unsigned ShOpVal = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 3;
+ return 4;
+ }
+
+ case ARM::LDRD: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(2).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
+ return (Rt == Rn) ? 3 : 2;
+ }
+
+ case ARM::STRD: {
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
+ return 2;
+ }
+
+ case ARM::LDRD_POST:
+ case ARM::t2LDRD_POST:
+ return 3;
+
+ case ARM::STRD_POST:
+ case ARM::t2STRD_POST:
+ return 4;
+
+ case ARM::LDRD_PRE: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(3).getReg();
+ unsigned Rm = MI->getOperand(4).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
+ return (Rt == Rn) ? 4 : 3;
+ }
+
+ case ARM::t2LDRD_PRE: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(3).getReg();
+ return (Rt == Rn) ? 4 : 3;
+ }
+
+ case ARM::STRD_PRE: {
+ unsigned Rm = MI->getOperand(4).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
+ return 3;
+ }
+
+ case ARM::t2STRD_PRE:
+ return 3;
+
+ case ARM::t2LDR_POST:
+ case ARM::t2LDRB_POST:
+ case ARM::t2LDRB_PRE:
+ case ARM::t2LDRSBi12:
+ case ARM::t2LDRSBi8:
+ case ARM::t2LDRSBpci:
+ case ARM::t2LDRSBs:
+ case ARM::t2LDRH_POST:
+ case ARM::t2LDRH_PRE:
+ case ARM::t2LDRSBT:
+ case ARM::t2LDRSB_POST:
+ case ARM::t2LDRSB_PRE:
+ case ARM::t2LDRSH_POST:
+ case ARM::t2LDRSH_PRE:
+ case ARM::t2LDRSHi12:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSHpci:
+ case ARM::t2LDRSHs:
+ return 2;
+
+ case ARM::t2LDRDi8: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(2).getReg();
+ return (Rt == Rn) ? 3 : 2;
+ }
+
+ case ARM::t2STRB_POST:
+ case ARM::t2STRB_PRE:
+ case ARM::t2STRBs:
+ case ARM::t2STRDi8:
+ case ARM::t2STRH_POST:
+ case ARM::t2STRH_PRE:
+ case ARM::t2STRHs:
+ case ARM::t2STR_POST:
+ case ARM::t2STR_PRE:
+ case ARM::t2STRs:
+ return 2;
+ }
+}
+
+// Return the number of 32-bit words loaded by LDM or stored by STM. If this
+// can't be easily determined return 0 (missing MachineMemOperand).
+//
+// FIXME: The current MachineInstr design does not support relying on machine
+// mem operands to determine the width of a memory access. Instead, we expect
+// the target to provide this information based on the instruction opcode and
+// operands. However, using MachineMemOperand is a the best solution now for
+// two reasons:
+//
+// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
+// operands. This is much more dangerous than using the MachineMemOperand
+// sizes because CodeGen passes can insert/remove optional machine operands. In
+// fact, it's totally incorrect for preRA passes and appears to be wrong for
+// postRA passes as well.
+//
+// 2) getNumLDMAddresses is only used by the scheduling machine model and any
+// machine model that calls this should handle the unknown (zero size) case.
+//
+// Long term, we should require a target hook that verifies MachineMemOperand
+// sizes during MC lowering. That target hook should be local to MC lowering
+// because we can't ensure that it is aware of other MI forms. Doing this will
+// ensure that MachineMemOperands are correctly propagated through all passes.
+unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const {
+ unsigned Size = 0;
+ for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+ E = MI->memoperands_end(); I != E; ++I) {
+ Size += (*I)->getSize();
+ }
+ return Size / 4;
+}
+
unsigned
ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
const MachineInstr *MI) const {
@@ -2351,8 +2644,12 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
const MCInstrDesc &Desc = MI->getDesc();
unsigned Class = Desc.getSchedClass();
int ItinUOps = ItinData->getNumMicroOps(Class);
- if (ItinUOps >= 0)
+ if (ItinUOps >= 0) {
+ if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
+ return getNumMicroOpsSwiftLdSt(ItinData, MI);
+
return ItinUOps;
+ }
unsigned Opc = MI->getOpcode();
switch (Opc) {
@@ -2421,7 +2718,43 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
case ARM::t2STMIA_UPD:
case ARM::t2STMDB_UPD: {
unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
- if (Subtarget.isCortexA8()) {
+ if (Subtarget.isSwift()) {
+ // rdar://8402126
+ int UOps = 1 + NumRegs; // One for address computation, one for each ld / st.
+ switch (Opc) {
+ default: break;
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ case ARM::VSTMDIA_UPD:
+ case ARM::VSTMDDB_UPD:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD:
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::STMIA_UPD:
+ case ARM::STMDA_UPD:
+ case ARM::STMDB_UPD:
+ case ARM::STMIB_UPD:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD:
+ ++UOps; // One for base register writeback.
+ break;
+ case ARM::LDMIA_RET:
+ case ARM::tPOP_RET:
+ case ARM::t2LDMIA_RET:
+ UOps += 2; // One for base reg wb, one for write to pc.
+ break;
+ }
+ return UOps;
+ } else if (Subtarget.isCortexA8()) {
if (NumRegs < 4)
return 2;
// 4 registers would be issued: 2, 2.
@@ -2430,7 +2763,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
if (NumRegs % 2)
++A8UOps;
return A8UOps;
- } else if (Subtarget.isCortexA9()) {
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
int A9UOps = (NumRegs / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
@@ -2463,7 +2796,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
DefCycle = RegNo / 2 + 1;
if (RegNo % 2)
++DefCycle;
- } else if (Subtarget.isCortexA9()) {
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
DefCycle = RegNo;
bool isSLoad = false;
@@ -2507,7 +2840,7 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
DefCycle = 1;
// Result latency is issue cycle + 2: E2.
DefCycle += 2;
- } else if (Subtarget.isCortexA9()) {
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
DefCycle = (RegNo / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
@@ -2538,7 +2871,7 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
UseCycle = RegNo / 2 + 1;
if (RegNo % 2)
++UseCycle;
- } else if (Subtarget.isCortexA9()) {
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
UseCycle = RegNo;
bool isSStore = false;
@@ -2579,7 +2912,7 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
UseCycle = 2;
// Read in E3.
UseCycle += 2;
- } else if (Subtarget.isCortexA9()) {
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
UseCycle = (RegNo / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
@@ -2764,7 +3097,7 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget,
const MachineInstr *DefMI,
const MCInstrDesc *DefMCID, unsigned DefAlign) {
int Adjust = 0;
- if (Subtarget.isCortexA8() || Subtarget.isCortexA9()) {
+ if (Subtarget.isCortexA8() || Subtarget.isLikeA9()) {
// FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
// variants are one cycle cheaper.
switch (DefMCID->getOpcode()) {
@@ -2789,9 +3122,40 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget,
break;
}
}
+ } else if (Subtarget.isSwift()) {
+ // FIXME: Properly handle all of the latency adjustments for address
+ // writeback.
+ switch (DefMCID->getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal = DefMI->getOperand(3).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ Adjust -= 2;
+ else if (!isSub &&
+ ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
+ --Adjust;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl only.
+ unsigned ShAmt = DefMI->getOperand(3).getImm();
+ if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
+ Adjust -= 2;
+ break;
+ }
+ }
}
- if (DefAlign < 8 && Subtarget.isCortexA9()) {
+ if (DefAlign < 8 && Subtarget.isLikeA9()) {
switch (DefMCID->getOpcode()) {
default: break;
case ARM::VLD1q8:
@@ -2949,7 +3313,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
if (Reg == ARM::CPSR) {
if (DefMI->getOpcode() == ARM::FMSTAT) {
// fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
- return Subtarget.isCortexA9() ? 1 : 20;
+ return Subtarget.isLikeA9() ? 1 : 20;
}
// CPSR set and branch can be paired in the same cycle.
@@ -2965,7 +3329,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// instructions).
if (Latency > 0 && Subtarget.isThumb2()) {
const MachineFunction *MF = DefMI->getParent()->getParent();
- if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ if (MF->getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize))
--Latency;
}
return Latency;
@@ -3015,7 +3381,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
if (!UseNode->isMachineOpcode()) {
int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
- if (Subtarget.isCortexA9())
+ if (Subtarget.isLikeA9() || Subtarget.isSwift())
return Latency <= 2 ? 1 : Latency - 1;
else
return Latency <= 3 ? 1 : Latency - 2;
@@ -3032,7 +3398,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
UseMCID, UseIdx, UseAlign);
if (Latency > 1 &&
- (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+ (Subtarget.isCortexA8() || Subtarget.isLikeA9())) {
// FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
// variants are one cycle cheaper.
switch (DefMCID.getOpcode()) {
@@ -3059,9 +3425,36 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
break;
}
}
+ } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
+ // FIXME: Properly handle all of the latency adjustments for address
+ // writeback.
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal =
+ cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+ Latency -= 2;
+ else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
+ --Latency;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl 0-3 only.
+ Latency -= 2;
+ break;
+ }
+ }
}
- if (DefAlign < 8 && Subtarget.isCortexA9())
+ if (DefAlign < 8 && Subtarget.isLikeA9())
switch (DefMCID.getOpcode()) {
default: break;
case ARM::VLD1q8:
@@ -3185,18 +3578,6 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
-unsigned
-ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData,
- const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *DepMI) const {
- unsigned Reg = DefMI->getOperand(DefIdx).getReg();
- if (DepMI->readsRegister(Reg, &getRegisterInfo()) || !isPredicated(DepMI))
- return 1;
-
- // If the second MI is predicated, then there is an implicit use dependency.
- return getInstrLatency(ItinData, DefMI);
-}
-
unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
@@ -3354,9 +3735,9 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
- // Cortex-A9 is particularly picky about mixing the two and wants these
+ // A9-like cores are particularly picky about mixing the two and want these
// converted.
- if (Subtarget.isCortexA9() && !isPredicated(MI) &&
+ if (Subtarget.isLikeA9() && !isPredicated(MI) &&
(MI->getOpcode() == ARM::VMOVRS ||
MI->getOpcode() == ARM::VMOVSR ||
MI->getOpcode() == ARM::VMOVS))
@@ -3394,12 +3775,54 @@ static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,
return DReg;
}
+/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
+/// set ImplicitSReg to a register number that must be marked as implicit-use or
+/// zero if no register needs to be defined as implicit-use.
+///
+/// If the function cannot determine if an SPR should be marked implicit use or
+/// not, it returns false.
+///
+/// This function handles cases where an instruction is being modified from taking
+/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
+/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
+/// lane of the DPR).
+///
+/// If the other SPR is defined, an implicit-use of it should be added. Else,
+/// (including the case where the DPR itself is defined), it should not.
+///
+static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,
+ MachineInstr *MI,
+ unsigned DReg, unsigned Lane,
+ unsigned &ImplicitSReg) {
+ // If the DPR is defined or used already, the other SPR lane will be chained
+ // correctly, so there is nothing to be done.
+ if (MI->definesRegister(DReg, TRI) || MI->readsRegister(DReg, TRI)) {
+ ImplicitSReg = 0;
+ return true;
+ }
+
+ // Otherwise we need to go searching to see if the SPR is set explicitly.
+ ImplicitSReg = TRI->getSubReg(DReg,
+ (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
+ MachineBasicBlock::LivenessQueryResult LQR =
+ MI->getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
+
+ if (LQR == MachineBasicBlock::LQR_Live)
+ return true;
+ else if (LQR == MachineBasicBlock::LQR_Unknown)
+ return false;
+
+ // If the register is known not to be live, there is no need to add an
+ // implicit-use.
+ ImplicitSReg = 0;
+ return true;
+}
void
ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
unsigned DstReg, SrcReg, DReg;
unsigned Lane;
- MachineInstrBuilder MIB(MI);
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
const TargetRegisterInfo *TRI = &getRegisterInfo();
switch (MI->getOpcode()) {
default:
@@ -3451,7 +3874,7 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
// was dead before here.
MIB.addReg(SrcReg, RegState::Implicit);
break;
- case ARM::VMOVSR:
+ case ARM::VMOVSR: {
if (Domain != ExeNEON)
break;
assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
@@ -3462,12 +3885,9 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
- // If we insert both a novel <def> and an <undef> on the DReg, we break
- // any existing dependency chain on the unused lane. Either already being
- // present means this instruction is in that chain anyway so we can make
- // the transformation.
- if (!MI->definesRegister(DReg, TRI) && !MI->readsRegister(DReg, TRI))
- break;
+ unsigned ImplicitSReg;
+ if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
+ break;
for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
MI->RemoveOperand(i-1);
@@ -3484,7 +3904,10 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
// The narrower destination must be marked as set to keep previous chains
// in place.
MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
+ if (ImplicitSReg != 0)
+ MIB.addReg(ImplicitSReg, RegState::Implicit);
break;
+ }
case ARM::VMOVS: {
if (Domain != ExeNEON)
break;
@@ -3497,12 +3920,9 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
- // If we insert both a novel <def> and an <undef> on the DReg, we break
- // any existing dependency chain on the unused lane. Either already being
- // present means this instruction is in that chain anyway so we can make
- // the transformation.
- if (!MI->definesRegister(DDst, TRI) && !MI->readsRegister(DDst, TRI))
- break;
+ unsigned ImplicitSReg;
+ if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
+ break;
for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
MI->RemoveOperand(i-1);
@@ -3520,6 +3940,8 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
// more, so add them in manually.
MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
MIB.addReg(SrcReg, RegState::Implicit);
+ if (ImplicitSReg != 0)
+ MIB.addReg(ImplicitSReg, RegState::Implicit);
break;
}
@@ -3578,12 +4000,130 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
// As before, the original destination is no longer represented, add it
// implicitly.
MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
+ if (ImplicitSReg != 0)
+ MIB.addReg(ImplicitSReg, RegState::Implicit);
break;
}
}
}
+//===----------------------------------------------------------------------===//
+// Partial register updates
+//===----------------------------------------------------------------------===//
+//
+// Swift renames NEON registers with 64-bit granularity. That means any
+// instruction writing an S-reg implicitly reads the containing D-reg. The
+// problem is mostly avoided by translating f32 operations to v2f32 operations
+// on D-registers, but f32 loads are still a problem.
+//
+// These instructions can load an f32 into a NEON register:
+//
+// VLDRS - Only writes S, partial D update.
+// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
+// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
+//
+// FCONSTD can be used as a dependency-breaking instruction.
+
+
+unsigned ARMBaseInstrInfo::
+getPartialRegUpdateClearance(const MachineInstr *MI,
+ unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ // Only Swift has partial register update problems.
+ if (!SwiftPartialUpdateClearance || !Subtarget.isSwift())
+ return 0;
+
+ assert(TRI && "Need TRI instance");
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (MO.readsReg())
+ return 0;
+ unsigned Reg = MO.getReg();
+ int UseOp = -1;
+
+ switch(MI->getOpcode()) {
+ // Normal instructions writing only an S-register.
+ case ARM::VLDRS:
+ case ARM::FCONSTS:
+ case ARM::VMOVSR:
+ // rdar://problem/8791586
+ case ARM::VMOVv8i8:
+ case ARM::VMOVv4i16:
+ case ARM::VMOVv2i32:
+ case ARM::VMOVv2f32:
+ case ARM::VMOVv1i64:
+ UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI);
+ break;
+
+ // Explicitly reads the dependency.
+ case ARM::VLD1LNd32:
+ UseOp = 1;
+ break;
+ default:
+ return 0;
+ }
+
+ // If this instruction actually reads a value from Reg, there is no unwanted
+ // dependency.
+ if (UseOp != -1 && MI->getOperand(UseOp).readsReg())
+ return 0;
+
+ // We must be able to clobber the whole D-reg.
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // Virtual register must be a foo:ssub_0<def,undef> operand.
+ if (!MO.getSubReg() || MI->readsVirtualRegister(Reg))
+ return 0;
+ } else if (ARM::SPRRegClass.contains(Reg)) {
+ // Physical register: MI must define the full D-reg.
+ unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
+ &ARM::DPRRegClass);
+ if (!DReg || !MI->definesRegister(DReg, TRI))
+ return 0;
+ }
+
+ // MI has an unwanted D-register dependency.
+ // Avoid defs in the previous N instructrions.
+ return SwiftPartialUpdateClearance;
+}
+
+// Break a partial register dependency after getPartialRegUpdateClearance
+// returned non-zero.
+void ARMBaseInstrInfo::
+breakPartialRegDependency(MachineBasicBlock::iterator MI,
+ unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def");
+ assert(TRI && "Need TRI instance");
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ unsigned Reg = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "Can't break virtual register dependencies.");
+ unsigned DReg = Reg;
+
+ // If MI defines an S-reg, find the corresponding D super-register.
+ if (ARM::SPRRegClass.contains(Reg)) {
+ DReg = ARM::D0 + (Reg - ARM::S0) / 2;
+ assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
+ }
+
+ assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
+ assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
+
+ // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
+ // the full D-register by loading the same value to both lanes. The
+ // instruction is micro-coded with 2 uops, so don't do this until we can
+ // properly schedule micro-coded instuctions. The dispatcher stalls cause
+ // too big regressions.
+
+ // Insert the dependency-breaking FCONSTD before MI.
+ // 96 is the encoding of 0.5, but the actual value doesn't matter here.
+ AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ get(ARM::FCONSTD), DReg).addImm(96));
+ MI->addRegisterKilled(DReg, TRI, true);
+}
+
bool ARMBaseInstrInfo::hasNOP() const {
return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 92e5ee8..2698132 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -15,10 +15,10 @@
#define ARMBASEINSTRUCTIONINFO_H
#include "ARM.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "ARMGenInstrInfo.inc"
@@ -182,10 +182,13 @@ public:
virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
unsigned NumCycles,
const BranchProbability
- &Probability) const {
+ &Probability) const {
return NumCycles == 1;
}
+ virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const;
+
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2 if having two register operands, and the value it
/// compares against in CmpValue. Return true if the comparison instruction
@@ -226,15 +229,18 @@ public:
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode, unsigned UseIdx) const;
- virtual unsigned getOutputLatency(const InstrItineraryData *ItinData,
- const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *DepMI) const;
-
/// VFP/NEON execution domains.
std::pair<uint16_t, uint16_t>
getExecutionDomain(const MachineInstr *MI) const;
void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
+ unsigned getPartialRegUpdateClearance(const MachineInstr*, unsigned,
+ const TargetRegisterInfo*) const;
+ void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned,
+ const TargetRegisterInfo *TRI) const;
+ /// Get the number of addresses by LDM or VLDM or zero for unknown.
+ unsigned getNumLDMAddresses(const MachineInstr *MI) const;
+
private:
unsigned getInstBundleLength(const MachineInstr *MI) const;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 9deb96e..d2f6a33 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -18,44 +18,34 @@
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/CommandLine.h"
#define GET_REGINFO_TARGET_DESC
#include "ARMGenRegisterInfo.inc"
using namespace llvm;
-static cl::opt<bool>
-ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false),
- cl::desc("Force use of virtual base registers for stack load/store"));
-static cl::opt<bool>
-EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden,
- cl::desc("Enable pre-regalloc stack frame index allocation"));
-static cl::opt<bool>
-EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true),
- cl::desc("Enable use of a base pointer for complex stack frames"));
-
ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
const ARMSubtarget &sti)
- : ARMGenRegisterInfo(ARM::LR), TII(tii), STI(sti),
+ : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), TII(tii), STI(sti),
FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
BasePtr(ARM::R6) {
}
@@ -84,6 +74,11 @@ ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
}
+const uint32_t*
+ARMBaseRegisterInfo::getNoPreservedMask() const {
+ return CSR_NoRegs_RegMask;
+}
+
BitVector ARMBaseRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
@@ -106,148 +101,12 @@ getReservedRegs(const MachineFunction &MF) const {
for (unsigned i = 0; i != 16; ++i)
Reserved.set(ARM::D16 + i);
}
- return Reserved;
-}
+ const TargetRegisterClass *RC = &ARM::GPRPairRegClass;
+ for(TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I!=E; ++I)
+ for (MCSubRegIterator SI(*I, this); SI.isValid(); ++SI)
+ if (Reserved.test(*SI)) Reserved.set(*I);
-bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
- unsigned Reg) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- switch (Reg) {
- default: break;
- case ARM::SP:
- case ARM::PC:
- return true;
- case ARM::R6:
- if (hasBasePointer(MF))
- return true;
- break;
- case ARM::R7:
- case ARM::R11:
- if (FramePtr == Reg && TFI->hasFP(MF))
- return true;
- break;
- case ARM::R9:
- return STI.isR9Reserved();
- }
-
- return false;
-}
-
-bool
-ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC,
- SmallVectorImpl<unsigned> &SubIndices,
- unsigned &NewSubIdx) const {
-
- unsigned Size = RC->getSize() * 8;
- if (Size < 6)
- return 0;
-
- NewSubIdx = 0; // Whole register.
- unsigned NumRegs = SubIndices.size();
- if (NumRegs == 8) {
- // 8 D registers -> 1 QQQQ register.
- return (Size == 512 &&
- SubIndices[0] == ARM::dsub_0 &&
- SubIndices[1] == ARM::dsub_1 &&
- SubIndices[2] == ARM::dsub_2 &&
- SubIndices[3] == ARM::dsub_3 &&
- SubIndices[4] == ARM::dsub_4 &&
- SubIndices[5] == ARM::dsub_5 &&
- SubIndices[6] == ARM::dsub_6 &&
- SubIndices[7] == ARM::dsub_7);
- } else if (NumRegs == 4) {
- if (SubIndices[0] == ARM::qsub_0) {
- // 4 Q registers -> 1 QQQQ register.
- return (Size == 512 &&
- SubIndices[1] == ARM::qsub_1 &&
- SubIndices[2] == ARM::qsub_2 &&
- SubIndices[3] == ARM::qsub_3);
- } else if (SubIndices[0] == ARM::dsub_0) {
- // 4 D registers -> 1 QQ register.
- if (Size >= 256 &&
- SubIndices[1] == ARM::dsub_1 &&
- SubIndices[2] == ARM::dsub_2 &&
- SubIndices[3] == ARM::dsub_3) {
- if (Size == 512)
- NewSubIdx = ARM::qqsub_0;
- return true;
- }
- } else if (SubIndices[0] == ARM::dsub_4) {
- // 4 D registers -> 1 QQ register (2nd).
- if (Size == 512 &&
- SubIndices[1] == ARM::dsub_5 &&
- SubIndices[2] == ARM::dsub_6 &&
- SubIndices[3] == ARM::dsub_7) {
- NewSubIdx = ARM::qqsub_1;
- return true;
- }
- } else if (SubIndices[0] == ARM::ssub_0) {
- // 4 S registers -> 1 Q register.
- if (Size >= 128 &&
- SubIndices[1] == ARM::ssub_1 &&
- SubIndices[2] == ARM::ssub_2 &&
- SubIndices[3] == ARM::ssub_3) {
- if (Size >= 256)
- NewSubIdx = ARM::qsub_0;
- return true;
- }
- }
- } else if (NumRegs == 2) {
- if (SubIndices[0] == ARM::qsub_0) {
- // 2 Q registers -> 1 QQ register.
- if (Size >= 256 && SubIndices[1] == ARM::qsub_1) {
- if (Size == 512)
- NewSubIdx = ARM::qqsub_0;
- return true;
- }
- } else if (SubIndices[0] == ARM::qsub_2) {
- // 2 Q registers -> 1 QQ register (2nd).
- if (Size == 512 && SubIndices[1] == ARM::qsub_3) {
- NewSubIdx = ARM::qqsub_1;
- return true;
- }
- } else if (SubIndices[0] == ARM::dsub_0) {
- // 2 D registers -> 1 Q register.
- if (Size >= 128 && SubIndices[1] == ARM::dsub_1) {
- if (Size >= 256)
- NewSubIdx = ARM::qsub_0;
- return true;
- }
- } else if (SubIndices[0] == ARM::dsub_2) {
- // 2 D registers -> 1 Q register (2nd).
- if (Size >= 256 && SubIndices[1] == ARM::dsub_3) {
- NewSubIdx = ARM::qsub_1;
- return true;
- }
- } else if (SubIndices[0] == ARM::dsub_4) {
- // 2 D registers -> 1 Q register (3rd).
- if (Size == 512 && SubIndices[1] == ARM::dsub_5) {
- NewSubIdx = ARM::qsub_2;
- return true;
- }
- } else if (SubIndices[0] == ARM::dsub_6) {
- // 2 D registers -> 1 Q register (3rd).
- if (Size == 512 && SubIndices[1] == ARM::dsub_7) {
- NewSubIdx = ARM::qsub_3;
- return true;
- }
- } else if (SubIndices[0] == ARM::ssub_0) {
- // 2 S registers -> 1 D register.
- if (SubIndices[1] == ARM::ssub_1) {
- if (Size >= 128)
- NewSubIdx = ARM::dsub_0;
- return true;
- }
- } else if (SubIndices[0] == ARM::ssub_2) {
- // 2 S registers -> 1 D register (2nd).
- if (Size >= 128 && SubIndices[1] == ARM::ssub_3) {
- NewSubIdx = ARM::dsub_1;
- return true;
- }
- }
- }
- return false;
+ return Reserved;
}
const TargetRegisterClass*
@@ -263,6 +122,7 @@ ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
case ARM::QPRRegClassID:
case ARM::QQPRRegClassID:
case ARM::QQQQPRRegClassID:
+ case ARM::GPRPairRegClassID:
return Super;
}
Super = *I++;
@@ -303,154 +163,62 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
}
-/// getRawAllocationOrder - Returns the register allocation order for a
-/// specified register class with a target-dependent hint.
-ArrayRef<uint16_t>
-ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
- unsigned HintType, unsigned HintReg,
- const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- // Alternative register allocation orders when favoring even / odd registers
- // of register pairs.
-
- // No FP, R9 is available.
- static const uint16_t GPREven1[] = {
- ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10,
- ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7,
- ARM::R9, ARM::R11
- };
- static const uint16_t GPROdd1[] = {
- ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11,
- ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
- ARM::R8, ARM::R10
- };
-
- // FP is R7, R9 is available.
- static const uint16_t GPREven2[] = {
- ARM::R0, ARM::R2, ARM::R4, ARM::R8, ARM::R10,
- ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6,
- ARM::R9, ARM::R11
- };
- static const uint16_t GPROdd2[] = {
- ARM::R1, ARM::R3, ARM::R5, ARM::R9, ARM::R11,
- ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
- ARM::R8, ARM::R10
- };
-
- // FP is R11, R9 is available.
- static const uint16_t GPREven3[] = {
- ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8,
- ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7,
- ARM::R9
- };
- static const uint16_t GPROdd3[] = {
- ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9,
- ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7,
- ARM::R8
- };
-
- // No FP, R9 is not available.
- static const uint16_t GPREven4[] = {
- ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R10,
- ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8,
- ARM::R11
- };
- static const uint16_t GPROdd4[] = {
- ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R11,
- ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
- ARM::R10
- };
-
- // FP is R7, R9 is not available.
- static const uint16_t GPREven5[] = {
- ARM::R0, ARM::R2, ARM::R4, ARM::R10,
- ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8,
- ARM::R11
- };
- static const uint16_t GPROdd5[] = {
- ARM::R1, ARM::R3, ARM::R5, ARM::R11,
- ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
- ARM::R10
- };
-
- // FP is R11, R9 is not available.
- static const uint16_t GPREven6[] = {
- ARM::R0, ARM::R2, ARM::R4, ARM::R6,
- ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8
- };
- static const uint16_t GPROdd6[] = {
- ARM::R1, ARM::R3, ARM::R5, ARM::R7,
- ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8
- };
-
- // We only support even/odd hints for GPR and rGPR.
- if (RC != &ARM::GPRRegClass && RC != &ARM::rGPRRegClass)
- return RC->getRawAllocationOrder(MF);
-
- if (HintType == ARMRI::RegPairEven) {
- if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0)
- // It's no longer possible to fulfill this hint. Return the default
- // allocation order.
- return RC->getRawAllocationOrder(MF);
-
- if (!TFI->hasFP(MF)) {
- if (!STI.isR9Reserved())
- return makeArrayRef(GPREven1);
- else
- return makeArrayRef(GPREven4);
- } else if (FramePtr == ARM::R7) {
- if (!STI.isR9Reserved())
- return makeArrayRef(GPREven2);
- else
- return makeArrayRef(GPREven5);
- } else { // FramePtr == ARM::R11
- if (!STI.isR9Reserved())
- return makeArrayRef(GPREven3);
- else
- return makeArrayRef(GPREven6);
- }
- } else if (HintType == ARMRI::RegPairOdd) {
- if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0)
- // It's no longer possible to fulfill this hint. Return the default
- // allocation order.
- return RC->getRawAllocationOrder(MF);
-
- if (!TFI->hasFP(MF)) {
- if (!STI.isR9Reserved())
- return makeArrayRef(GPROdd1);
- else
- return makeArrayRef(GPROdd4);
- } else if (FramePtr == ARM::R7) {
- if (!STI.isR9Reserved())
- return makeArrayRef(GPROdd2);
- else
- return makeArrayRef(GPROdd5);
- } else { // FramePtr == ARM::R11
- if (!STI.isR9Reserved())
- return makeArrayRef(GPROdd3);
- else
- return makeArrayRef(GPROdd6);
- }
- }
- return RC->getRawAllocationOrder(MF);
+// Get the other register in a GPRPair.
+static unsigned getPairedGPR(unsigned Reg, bool Odd, const MCRegisterInfo *RI) {
+ for (MCSuperRegIterator Supers(Reg, RI); Supers.isValid(); ++Supers)
+ if (ARM::GPRPairRegClass.contains(*Supers))
+ return RI->getSubReg(*Supers, Odd ? ARM::gsub_1 : ARM::gsub_0);
+ return 0;
}
-/// ResolveRegAllocHint - Resolves the specified register allocation hint
-/// to a physical register. Returns the physical register if it is successful.
-unsigned
-ARMBaseRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
- const MachineFunction &MF) const {
- if (Reg == 0 || !isPhysicalRegister(Reg))
- return 0;
- if (Type == 0)
- return Reg;
- else if (Type == (unsigned)ARMRI::RegPairOdd)
- // Odd register.
- return getRegisterPairOdd(Reg, MF);
- else if (Type == (unsigned)ARMRI::RegPairEven)
- // Even register.
- return getRegisterPairEven(Reg, MF);
- return 0;
+// Resolve the RegPairEven / RegPairOdd register allocator hints.
+void
+ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
+ ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints,
+ const MachineFunction &MF,
+ const VirtRegMap *VRM) const {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
+
+ unsigned Odd;
+ switch (Hint.first) {
+ case ARMRI::RegPairEven:
+ Odd = 0;
+ break;
+ case ARMRI::RegPairOdd:
+ Odd = 1;
+ break;
+ default:
+ TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM);
+ return;
+ }
+
+ // This register should preferably be even (Odd == 0) or odd (Odd == 1).
+ // Check if the other part of the pair has already been assigned, and provide
+ // the paired register as the first hint.
+ unsigned PairedPhys = 0;
+ if (VRM && VRM->hasPhys(Hint.second)) {
+ PairedPhys = getPairedGPR(VRM->getPhys(Hint.second), Odd, this);
+ if (PairedPhys && MRI.isReserved(PairedPhys))
+ PairedPhys = 0;
+ }
+
+ // First prefer the paired physreg.
+ if (PairedPhys)
+ Hints.push_back(PairedPhys);
+
+ // Then prefer even or odd registers.
+ for (unsigned I = 0, E = Order.size(); I != E; ++I) {
+ unsigned Reg = Order[I];
+ if (Reg == PairedPhys || (getEncodingValue(Reg) & 1) != Odd)
+ continue;
+ // Don't provide hints that are paired to a reserved register.
+ unsigned Paired = getPairedGPR(Reg, !Odd, this);
+ if (!Paired || MRI.isReserved(Paired))
+ continue;
+ Hints.push_back(Reg);
+ }
}
void
@@ -476,7 +244,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
bool
ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
// CortexA9 has a Write-after-write hazard for NEON registers.
- if (!STI.isCortexA9())
+ if (!STI.isLikeA9())
return false;
switch (RC->getID()) {
@@ -501,9 +269,6 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- if (!EnableBasePointer)
- return false;
-
// When outgoing call frames are so large that we adjust the stack pointer
// around the call, we can no longer use the stack pointer to reach the
// emergency spill slot.
@@ -549,8 +314,6 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
// pointer adjustments around calls.
if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF))
return true;
- if (!EnableBasePointer)
- return false;
// A base pointer is required and allowed. Check that it isn't too late to
// reserve it.
return MRI->canReserveReg(BasePtr);
@@ -561,8 +324,10 @@ needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
- bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
- F->hasFnAttr(Attribute::StackAlignment));
+ bool requiresRealignment =
+ ((MFI->getMaxAlignment() > StackAlign) ||
+ F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackAlignment));
return requiresRealignment && canRealignStack(MF);
}
@@ -593,112 +358,6 @@ unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
llvm_unreachable("What is the exception handler register");
}
-unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
- const MachineFunction &MF) const {
- switch (Reg) {
- default: break;
- // Return 0 if either register of the pair is a special register.
- // So no R12, etc.
- case ARM::R1: return ARM::R0;
- case ARM::R3: return ARM::R2;
- case ARM::R5: return ARM::R4;
- case ARM::R7:
- return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
- ? 0 : ARM::R6;
- case ARM::R9: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8;
- case ARM::R11: return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10;
-
- case ARM::S1: return ARM::S0;
- case ARM::S3: return ARM::S2;
- case ARM::S5: return ARM::S4;
- case ARM::S7: return ARM::S6;
- case ARM::S9: return ARM::S8;
- case ARM::S11: return ARM::S10;
- case ARM::S13: return ARM::S12;
- case ARM::S15: return ARM::S14;
- case ARM::S17: return ARM::S16;
- case ARM::S19: return ARM::S18;
- case ARM::S21: return ARM::S20;
- case ARM::S23: return ARM::S22;
- case ARM::S25: return ARM::S24;
- case ARM::S27: return ARM::S26;
- case ARM::S29: return ARM::S28;
- case ARM::S31: return ARM::S30;
-
- case ARM::D1: return ARM::D0;
- case ARM::D3: return ARM::D2;
- case ARM::D5: return ARM::D4;
- case ARM::D7: return ARM::D6;
- case ARM::D9: return ARM::D8;
- case ARM::D11: return ARM::D10;
- case ARM::D13: return ARM::D12;
- case ARM::D15: return ARM::D14;
- case ARM::D17: return ARM::D16;
- case ARM::D19: return ARM::D18;
- case ARM::D21: return ARM::D20;
- case ARM::D23: return ARM::D22;
- case ARM::D25: return ARM::D24;
- case ARM::D27: return ARM::D26;
- case ARM::D29: return ARM::D28;
- case ARM::D31: return ARM::D30;
- }
-
- return 0;
-}
-
-unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
- const MachineFunction &MF) const {
- switch (Reg) {
- default: break;
- // Return 0 if either register of the pair is a special register.
- // So no R12, etc.
- case ARM::R0: return ARM::R1;
- case ARM::R2: return ARM::R3;
- case ARM::R4: return ARM::R5;
- case ARM::R6:
- return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
- ? 0 : ARM::R7;
- case ARM::R8: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9;
- case ARM::R10: return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11;
-
- case ARM::S0: return ARM::S1;
- case ARM::S2: return ARM::S3;
- case ARM::S4: return ARM::S5;
- case ARM::S6: return ARM::S7;
- case ARM::S8: return ARM::S9;
- case ARM::S10: return ARM::S11;
- case ARM::S12: return ARM::S13;
- case ARM::S14: return ARM::S15;
- case ARM::S16: return ARM::S17;
- case ARM::S18: return ARM::S19;
- case ARM::S20: return ARM::S21;
- case ARM::S22: return ARM::S23;
- case ARM::S24: return ARM::S25;
- case ARM::S26: return ARM::S27;
- case ARM::S28: return ARM::S29;
- case ARM::S30: return ARM::S31;
-
- case ARM::D0: return ARM::D1;
- case ARM::D2: return ARM::D3;
- case ARM::D4: return ARM::D5;
- case ARM::D6: return ARM::D7;
- case ARM::D8: return ARM::D9;
- case ARM::D10: return ARM::D11;
- case ARM::D12: return ARM::D13;
- case ARM::D14: return ARM::D15;
- case ARM::D16: return ARM::D17;
- case ARM::D18: return ARM::D19;
- case ARM::D20: return ARM::D21;
- case ARM::D22: return ARM::D23;
- case ARM::D24: return ARM::D25;
- case ARM::D26: return ARM::D27;
- case ARM::D28: return ARM::D29;
- case ARM::D30: return ARM::D31;
- }
-
- return 0;
-}
-
/// emitLoadConstPool - Emits a load from constpool to materialize the
/// specified immediate.
void ARMBaseRegisterInfo::
@@ -738,7 +397,7 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const {
bool ARMBaseRegisterInfo::
requiresVirtualBaseRegisters(const MachineFunction &MF) const {
- return EnableLocalStackAlloc;
+ return true;
}
static void
@@ -877,8 +536,6 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
case ARM::VLDRS: case ARM::VLDRD:
case ARM::VSTRS: case ARM::VSTRD:
case ARM::tSTRspi: case ARM::tLDRspi:
- if (ForceAllBaseRegAlloc)
- return true;
break;
default:
return false;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index da29f7e..aaa56a9 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -96,19 +96,10 @@ public:
/// Code Generation virtual methods...
const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+ const uint32_t *getNoPreservedMask() const;
BitVector getReservedRegs(const MachineFunction &MF) const;
- /// canCombineSubRegIndices - Given a register class and a list of
- /// subregister indices, return true if it's possible to combine the
- /// subregister indices into one that corresponds to a larger
- /// subregister. Return the new subregister index by reference. Note the
- /// new index may be zero if the given subregisters can be combined to
- /// form the whole register.
- virtual bool canCombineSubRegIndices(const TargetRegisterClass *RC,
- SmallVectorImpl<unsigned> &SubIndices,
- unsigned &NewSubIdx) const;
-
const TargetRegisterClass*
getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const;
const TargetRegisterClass*
@@ -120,12 +111,11 @@ public:
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const;
- ArrayRef<uint16_t> getRawAllocationOrder(const TargetRegisterClass *RC,
- unsigned HintType, unsigned HintReg,
- const MachineFunction &MF) const;
-
- unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
- const MachineFunction &MF) const;
+ void getRegAllocationHints(unsigned VirtReg,
+ ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints,
+ const MachineFunction &MF,
+ const VirtRegMap *VRM) const;
void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
MachineFunction &MF) const;
@@ -170,8 +160,6 @@ public:
unsigned MIFlags = MachineInstr::NoFlags)const;
/// Code Generation virtual methods...
- virtual bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
-
virtual bool requiresRegisterScavenging(const MachineFunction &MF) const;
virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
@@ -186,11 +174,6 @@ public:
virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS = NULL) const;
-
-private:
- unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const;
-
- unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const;
};
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index 0bd1c3e..e6e8c3d 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -18,8 +18,8 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/Target/TargetInstrInfo.h"
namespace llvm {
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index bda1517..b378b96 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -190,6 +190,8 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
// Callee-saved register lists.
//===----------------------------------------------------------------------===//
+def CSR_NoRegs : CalleeSavedRegs<(add)>;
+
def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
(sequence "D%u", 15, 8))>;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 89cbc84..95decfe 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -14,16 +14,13 @@
#define DEBUG_TYPE "jit"
#include "ARM.h"
-#include "ARMConstantPoolValue.h"
#include "ARMBaseInstrInfo.h"
+#include "ARMConstantPoolValue.h"
#include "ARMRelocations.h"
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/PassManager.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -31,7 +28,10 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -47,7 +47,7 @@ namespace {
class ARMCodeEmitter : public MachineFunctionPass {
ARMJITInfo *JTI;
const ARMBaseInstrInfo *II;
- const TargetData *TD;
+ const DataLayout *TD;
const ARMSubtarget *Subtarget;
TargetMachine &TM;
JITCodeEmitter &MCE;
@@ -67,7 +67,7 @@ namespace {
ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
: MachineFunctionPass(ID), JTI(0),
II((const ARMBaseInstrInfo *)tm.getInstrInfo()),
- TD(tm.getTargetData()), TM(tm),
+ TD(tm.getDataLayout()), TM(tm),
MCE(mce), MCPEs(0), MJTEs(0),
IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {}
@@ -392,12 +392,16 @@ FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
}
bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
- assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
- MF.getTarget().getRelocationModel() != Reloc::Static) &&
+ TargetMachine &Target = const_cast<TargetMachine&>(MF.getTarget());
+
+ assert((Target.getRelocationModel() != Reloc::Default ||
+ Target.getRelocationModel() != Reloc::Static) &&
"JIT relocation model must be set to static or default!");
- JTI = ((ARMBaseTargetMachine &)MF.getTarget()).getJITInfo();
- II = (const ARMBaseInstrInfo *)MF.getTarget().getInstrInfo();
- TD = MF.getTarget().getTargetData();
+
+ JTI = static_cast<ARMJITInfo*>(Target.getJITInfo());
+ II = static_cast<const ARMBaseInstrInfo*>(Target.getInstrInfo());
+ TD = Target.getDataLayout();
+
Subtarget = &TM.getSubtarget<ARMSubtarget>();
MCPEs = &MF.getConstantPool()->getConstants();
MJTEs = 0;
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index dd05f0c..70a25c2 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -16,23 +16,23 @@
#define DEBUG_TYPE "arm-cp-islands"
#include "ARM.h"
#include "ARMMachineFunctionInfo.h"
-#include "Thumb2InstrInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
#include <algorithm>
using namespace llvm;
@@ -528,7 +528,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
// identity mapping of CPI's to CPE's.
const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
- const TargetData &TD = *MF->getTarget().getTargetData();
+ const DataLayout &TD = *MF->getTarget().getDataLayout();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
assert(Size >= 4 && "Too small constant pool entry");
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index fa3226e..4e703ec 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -13,11 +13,11 @@
#include "ARMConstantPoolValue.h"
#include "llvm/ADT/FoldingSet.h"
-#include "llvm/Constant.h"
-#include "llvm/Constants.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Type.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdlib>
using namespace llvm;
@@ -206,11 +206,7 @@ ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, const char *s,
bool AddCurrentAddress)
: ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier,
AddCurrentAddress),
- S(strdup(s)) {}
-
-ARMConstantPoolSymbol::~ARMConstantPoolSymbol() {
- free((void*)S);
-}
+ S(s) {}
ARMConstantPoolSymbol *
ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s,
@@ -218,14 +214,6 @@ ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s,
return new ARMConstantPoolSymbol(C, s, ID, PCAdj, ARMCP::no_modifier, false);
}
-static bool CPV_streq(const char *S1, const char *S2) {
- if (S1 == S2)
- return true;
- if (S1 && S2 && strcmp(S1, S2) == 0)
- return true;
- return false;
-}
-
int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) {
unsigned AlignMask = Alignment - 1;
@@ -238,7 +226,7 @@ int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
ARMConstantPoolSymbol *APS = dyn_cast<ARMConstantPoolSymbol>(CPV);
if (!APS) continue;
- if (CPV_streq(APS->S, S) && equals(APS))
+ if (APS->S == S && equals(APS))
return i;
}
}
@@ -248,12 +236,11 @@ int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
bool ARMConstantPoolSymbol::hasSameValue(ARMConstantPoolValue *ACPV) {
const ARMConstantPoolSymbol *ACPS = dyn_cast<ARMConstantPoolSymbol>(ACPV);
- return ACPS && CPV_streq(ACPS->S, S) &&
- ARMConstantPoolValue::hasSameValue(ACPV);
+ return ACPS && ACPS->S == S && ARMConstantPoolValue::hasSameValue(ACPV);
}
void ARMConstantPoolSymbol::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
- ID.AddPointer(S);
+ ID.AddString(S);
ARMConstantPoolValue::addSelectionDAGCSEId(ID);
}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 6b98d44..93812fe 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -102,8 +102,6 @@ public:
virtual void print(raw_ostream &O) const;
void print(raw_ostream *O) const { if (O) print(*O); }
void dump() const;
-
- static bool classof(const ARMConstantPoolValue *) { return true; }
};
inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
@@ -158,25 +156,22 @@ public:
static bool classof(const ARMConstantPoolValue *APV) {
return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA();
}
- static bool classof(const ARMConstantPoolConstant *) { return true; }
};
/// ARMConstantPoolSymbol - ARM-specific constantpool values for external
/// symbols.
class ARMConstantPoolSymbol : public ARMConstantPoolValue {
- const char *S; // ExtSymbol being loaded.
+ const std::string S; // ExtSymbol being loaded.
ARMConstantPoolSymbol(LLVMContext &C, const char *s, unsigned id,
unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
bool AddCurrentAddress);
public:
- ~ARMConstantPoolSymbol();
-
static ARMConstantPoolSymbol *Create(LLVMContext &C, const char *s,
unsigned ID, unsigned char PCAdj);
- const char *getSymbol() const { return S; }
+ const char *getSymbol() const { return S.c_str(); }
virtual int getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment);
@@ -192,7 +187,6 @@ public:
static bool classof(const ARMConstantPoolValue *ACPV) {
return ACPV->isExtSymbol();
}
- static bool classof(const ARMConstantPoolSymbol *) { return true; }
};
/// ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic
@@ -225,7 +219,6 @@ public:
static bool classof(const ARMConstantPoolValue *ACPV) {
return ACPV->isMachineBasicBlock();
}
- static bool classof(const ARMConstantPoolMBB *) { return true; }
};
} // End llvm namespace
diff --git a/lib/Target/ARM/ARMELFWriterInfo.cpp b/lib/Target/ARM/ARMELFWriterInfo.cpp
deleted file mode 100644
index f671317..0000000
--- a/lib/Target/ARM/ARMELFWriterInfo.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-//===-- ARMELFWriterInfo.cpp - ELF Writer Info for the ARM backend --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements ELF writer information for the ARM backend.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARMELFWriterInfo.h"
-#include "ARMRelocations.h"
-#include "llvm/Function.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/ELF.h"
-
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Implementation of the ARMELFWriterInfo class
-//===----------------------------------------------------------------------===//
-
-ARMELFWriterInfo::ARMELFWriterInfo(TargetMachine &TM)
- : TargetELFWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64,
- TM.getTargetData()->isLittleEndian()) {
-}
-
-ARMELFWriterInfo::~ARMELFWriterInfo() {}
-
-unsigned ARMELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
- switch (MachineRelTy) {
- case ARM::reloc_arm_absolute:
- case ARM::reloc_arm_relative:
- case ARM::reloc_arm_cp_entry:
- case ARM::reloc_arm_vfp_cp_entry:
- case ARM::reloc_arm_machine_cp_entry:
- case ARM::reloc_arm_jt_base:
- case ARM::reloc_arm_pic_jt:
- llvm_unreachable("unsupported ARM relocation type");
-
- case ARM::reloc_arm_branch: return ELF::R_ARM_CALL;
- case ARM::reloc_arm_movt: return ELF::R_ARM_MOVT_ABS;
- case ARM::reloc_arm_movw: return ELF::R_ARM_MOVW_ABS_NC;
- default:
- llvm_unreachable("unknown ARM relocation type");
- }
-}
-
-long int ARMELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
- long int Modifier) const {
- llvm_unreachable("ARMELFWriterInfo::getDefaultAddendForRelTy() not "
- "implemented");
-}
-
-unsigned ARMELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
- llvm_unreachable("ARMELFWriterInfo::getRelocationTySize() not implemented");
-}
-
-bool ARMELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
- llvm_unreachable("ARMELFWriterInfo::isPCRelativeRel() not implemented");
-}
-
-unsigned ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
- llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not "
- "implemented");
-}
-
-long int ARMELFWriterInfo::computeRelocation(unsigned SymOffset,
- unsigned RelOffset,
- unsigned RelTy) const {
- llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not "
- "implemented");
-}
diff --git a/lib/Target/ARM/ARMELFWriterInfo.h b/lib/Target/ARM/ARMELFWriterInfo.h
deleted file mode 100644
index 6a84f8a..0000000
--- a/lib/Target/ARM/ARMELFWriterInfo.h
+++ /dev/null
@@ -1,59 +0,0 @@
-//===-- ARMELFWriterInfo.h - ELF Writer Info for ARM ------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements ELF writer information for the ARM backend.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM_ELF_WRITER_INFO_H
-#define ARM_ELF_WRITER_INFO_H
-
-#include "llvm/Target/TargetELFWriterInfo.h"
-
-namespace llvm {
- class TargetMachine;
-
- class ARMELFWriterInfo : public TargetELFWriterInfo {
- public:
- ARMELFWriterInfo(TargetMachine &TM);
- virtual ~ARMELFWriterInfo();
-
- /// getRelocationType - Returns the target specific ELF Relocation type.
- /// 'MachineRelTy' contains the object code independent relocation type
- virtual unsigned getRelocationType(unsigned MachineRelTy) const;
-
- /// hasRelocationAddend - True if the target uses an addend in the
- /// ELF relocation entry.
- virtual bool hasRelocationAddend() const { return false; }
-
- /// getDefaultAddendForRelTy - Gets the default addend value for a
- /// relocation entry based on the target ELF relocation type.
- virtual long int getDefaultAddendForRelTy(unsigned RelTy,
- long int Modifier = 0) const;
-
- /// getRelTySize - Returns the size of relocatable field in bits
- virtual unsigned getRelocationTySize(unsigned RelTy) const;
-
- /// isPCRelativeRel - True if the relocation type is pc relative
- virtual bool isPCRelativeRel(unsigned RelTy) const;
-
- /// getJumpTableRelocationTy - Returns the machine relocation type used
- /// to reference a jumptable.
- virtual unsigned getAbsoluteLabelMachineRelTy() const;
-
- /// computeRelocation - Some relocatable fields could be relocated
- /// directly, avoiding the relocation symbol emission, compute the
- /// final relocation value for this symbol.
- virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
- unsigned RelTy) const;
- };
-
-} // end llvm namespace
-
-#endif // ARM_ELF_WRITER_INFO_H
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 8ed6b75..beb843c 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -23,10 +23,10 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
static cl::opt<bool>
@@ -103,9 +103,9 @@ namespace {
bool IsLoad;
bool isUpdating;
bool hasWritebackOperand;
- NEONRegSpacing RegSpacing;
- unsigned char NumRegs; // D registers loaded or stored
- unsigned char RegElts; // elements per D register; used for lane ops
+ uint8_t RegSpacing; // One of type NEONRegSpacing
+ uint8_t NumRegs; // D registers loaded or stored
+ uint8_t RegElts; // elements per D register; used for lane ops
// FIXME: Temporary flag to denote whether the real instruction takes
// a single register (like the encoding) or all of the registers in
// the list (like the asm syntax and the isel DAG). When all definitions
@@ -377,7 +377,7 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
- NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
unsigned NumRegs = TableEntry->NumRegs;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
@@ -442,7 +442,7 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
- NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
unsigned NumRegs = TableEntry->NumRegs;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
@@ -493,7 +493,7 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
assert(TableEntry && "NEONLdStTable lookup failed");
- NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
unsigned NumRegs = TableEntry->NumRegs;
unsigned RegElts = TableEntry->RegElts;
@@ -777,9 +777,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MI.eraseFromParent();
return true;
}
- case ARM::Int_eh_sjlj_dispatchsetup:
- case ARM::Int_eh_sjlj_dispatchsetup_nofp:
- case ARM::tInt_eh_sjlj_dispatchsetup: {
+ case ARM::Int_eh_sjlj_dispatchsetup: {
MachineFunction &MF = *MI.getParent()->getParent();
const ARMBaseInstrInfo *AII =
static_cast<const ARMBaseInstrInfo*>(TII);
@@ -1208,57 +1206,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
ExpandLaneOp(MBBI);
return true;
- case ARM::VSETLNi8Q:
- case ARM::VSETLNi16Q: {
- // Expand VSETLNs acting on a Q register to equivalent VSETLNs acting
- // on the respective D register.
-
- unsigned QReg = MI.getOperand(1).getReg();
- unsigned QLane = MI.getOperand(3).getImm();
-
- unsigned NewOpcode, DLane, DSubReg;
- switch (Opcode) {
- default: llvm_unreachable("Invalid opcode!");
- case ARM::VSETLNi8Q:
- // 4 possible 8-bit lanes per DPR:
- NewOpcode = ARM::VSETLNi8;
- DLane = QLane % 8;
- DSubReg = (QLane / 8) ? ARM::dsub_1 : ARM::dsub_0;
- break;
- case ARM::VSETLNi16Q:
- // 4 possible 16-bit lanes per DPR.
- NewOpcode = ARM::VSETLNi16;
- DLane = QLane % 4;
- DSubReg = (QLane / 4) ? ARM::dsub_1 : ARM::dsub_0;
- break;
- }
-
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpcode));
-
- unsigned DReg = TRI->getSubReg(QReg, DSubReg);
-
- MIB.addReg(DReg, RegState::Define); // Output DPR
- MIB.addReg(DReg); // Input DPR
- MIB.addOperand(MI.getOperand(2)); // Input GPR
- MIB.addImm(DLane); // Lane
-
- // Add the predicate operands.
- MIB.addOperand(MI.getOperand(4));
- MIB.addOperand(MI.getOperand(5));
-
- if (MI.getOperand(1).isKill()) // Add an implicit kill for the Q register.
- MIB->addRegisterKilled(QReg, TRI, true);
- // And an implicit def of the output register (which should always be the
- // same as the input register).
- MIB->addRegisterDefined(QReg, TRI);
-
- TransferImpOps(MI, MIB, MIB);
-
- MI.eraseFromParent();
- return true;
- }
-
case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 045d904..94c574a 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -16,31 +16,31 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMCallingConv.h"
-#include "ARMTargetMachine.h"
-#include "ARMSubtarget.h"
#include "ARMConstantPoolValue.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/CallingConv.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -178,22 +178,24 @@ class ARMFastISel : public FastISel {
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt);
- bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
+ bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
unsigned Alignment = 0, bool isZExt = true,
bool allocReg = true);
- bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+ bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
unsigned Alignment = 0);
bool ARMComputeAddress(const Value *Obj, Address &Addr);
- void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3);
+ void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
bool ARMIsMemCpySmall(uint64_t Len);
- bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len);
- unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt);
- unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
- unsigned ARMMaterializeInt(const Constant *C, EVT VT);
- unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
- unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
- unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
+ bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
+ unsigned Alignment);
+ unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
+ unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT);
+ unsigned ARMMaterializeInt(const Constant *C, MVT VT);
+ unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT);
+ unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg);
+ unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg);
unsigned ARMSelectCallOp(bool UseReg);
+ unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
// Call handling routines.
private:
@@ -219,7 +221,7 @@ class ARMFastISel : public FastISel {
bool isARMNEONPred(const MachineInstr *MI);
bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
- void AddLoadStoreOperands(EVT VT, Address &Addr,
+ void AddLoadStoreOperands(MVT VT, Address &Addr,
const MachineInstrBuilder &MIB,
unsigned Flags, bool useAM3);
};
@@ -485,7 +487,7 @@ unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
// TODO: Don't worry about 64-bit now, but when this is fixed remove the
// checks from the various callers.
-unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
+unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) {
if (VT == MVT::f64) return 0;
unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
@@ -495,7 +497,7 @@ unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
return MoveReg;
}
-unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
+unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) {
if (VT == MVT::i64) return 0;
unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
@@ -508,7 +510,7 @@ unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
// For double width floating point we need to materialize two constants
// (the high and the low) into integer registers then use a move to get
// the combined constant into an FP reg.
-unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
+unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
const APFloat Val = CFP->getValueAPF();
bool is64bit = VT == MVT::f64;
@@ -552,7 +554,7 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
return DestReg;
}
-unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
+unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
return false;
@@ -562,7 +564,9 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
const ConstantInt *CI = cast<ConstantInt>(C);
if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
- unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
+ &ARM::GPRRegClass;
+ unsigned ImmReg = createResultReg(RC);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ImmReg)
.addImm(CI->getZExtValue()));
@@ -612,13 +616,16 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
return DestReg;
}
-unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
+unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
// For now 32-bit only.
if (VT != MVT::i32) return 0;
Reloc::Model RelocM = TM.getRelocationModel();
bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM);
- unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ const TargetRegisterClass *RC = isThumb2 ?
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
+ (const TargetRegisterClass*)&ARM::GPRRegClass;
+ unsigned DestReg = createResultReg(RC);
// Use movw+movt when possible, it avoids constant pool entries.
// Darwin targets don't support movt with Reloc::Static, see
@@ -648,6 +655,9 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
Align = TD.getTypeAllocSize(GV->getType());
}
+ if (Subtarget->isTargetELF() && RelocM == Reloc::PIC_)
+ return ARMLowerPICELF(GV, Align, VT);
+
// Grab index.
unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 :
(Subtarget->isThumb() ? 4 : 8);
@@ -709,10 +719,11 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
}
unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
- EVT VT = TLI.getValueType(C->getType(), true);
+ EVT CEVT = TLI.getValueType(C->getType(), true);
// Only handle simple types.
- if (!VT.isSimple()) return 0;
+ if (!CEVT.isSimple()) return 0;
+ MVT VT = CEVT.getSimpleVT();
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return ARMMaterializeFP(CFP, VT);
@@ -888,12 +899,9 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
return Addr.Base.Reg != 0;
}
-void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) {
-
- assert(VT.isSimple() && "Non-simple types are invalid here!");
-
+void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
bool needsLowering = false;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
default: llvm_unreachable("Unhandled load/store type!");
case MVT::i1:
case MVT::i8:
@@ -944,13 +952,12 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) {
}
}
-void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
+void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
const MachineInstrBuilder &MIB,
unsigned Flags, bool useAM3) {
// addrmode5 output depends on the selection dag addressing dividing the
// offset by 4 that it then later multiplies. Do this here as well.
- if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
- VT.getSimpleVT().SimpleTy == MVT::f64)
+ if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64)
Addr.Offset /= 4;
// Frame base works a bit differently. Handle it separately.
@@ -993,14 +1000,13 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
AddOptionalDefs(MIB);
}
-bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
+bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
unsigned Alignment, bool isZExt, bool allocReg) {
- assert(VT.isSimple() && "Non-simple types are invalid here!");
unsigned Opc;
bool useAM3 = false;
bool needVMOV = false;
const TargetRegisterClass *RC;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
// This is mostly going to be Neon/vector support.
default: return false;
case MVT::i1:
@@ -1021,6 +1027,9 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
RC = &ARM::GPRRegClass;
break;
case MVT::i16:
+ if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
+ return false;
+
if (isThumb2) {
if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
@@ -1033,6 +1042,9 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
RC = &ARM::GPRRegClass;
break;
case MVT::i32:
+ if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
+ return false;
+
if (isThumb2) {
if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
Opc = ARM::t2LDRi8;
@@ -1111,11 +1123,11 @@ bool ARMFastISel::SelectLoad(const Instruction *I) {
return true;
}
-bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
unsigned Alignment) {
unsigned StrOpc;
bool useAM3 = false;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
// This is mostly going to be Neon/vector support.
default: return false;
case MVT::i1: {
@@ -1139,6 +1151,9 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
}
break;
case MVT::i16:
+ if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
+ return false;
+
if (isThumb2) {
if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
StrOpc = ARM::t2STRHi8;
@@ -1150,6 +1165,9 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
}
break;
case MVT::i32:
+ if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
+ return false;
+
if (isThumb2) {
if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
StrOpc = ARM::t2STRi8;
@@ -1372,14 +1390,20 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
.addReg(AddrReg));
+
+ const IndirectBrInst *IB = cast<IndirectBrInst>(I);
+ for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
+ FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
+
return true;
}
bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt) {
Type *Ty = Src1Value->getType();
- EVT SrcVT = TLI.getValueType(Ty, true);
- if (!SrcVT.isSimple()) return false;
+ EVT SrcEVT = TLI.getValueType(Ty, true);
+ if (!SrcEVT.isSimple()) return false;
+ MVT SrcVT = SrcEVT.getSimpleVT();
bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
if (isFloat && !Subtarget->hasVFP2())
@@ -1416,7 +1440,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
unsigned CmpOpc;
bool isICmp = true;
bool needsExt = false;
- switch (SrcVT.getSimpleVT().SimpleTy) {
+ switch (SrcVT.SimpleTy) {
default: return false;
// TODO: Verify compares.
case MVT::f32:
@@ -1568,7 +1592,10 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
return false;
Value *Src = I->getOperand(0);
- EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT SrcEVT = TLI.getValueType(Src->getType(), true);
+ if (!SrcEVT.isSimple())
+ return false;
+ MVT SrcVT = SrcEVT.getSimpleVT();
if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
return false;
@@ -1577,8 +1604,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
// Handle sign-extension.
if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
- EVT DestVT = MVT::i32;
- SrcReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT,
+ SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32,
/*isZExt*/!isSigned);
if (SrcReg == 0) return false;
}
@@ -1641,7 +1667,6 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
// Things need to be register sized for register moves.
if (VT != MVT::i32) return false;
- const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
unsigned CondReg = getRegForValue(I->getOperand(0));
if (CondReg == 0) return false;
@@ -1674,14 +1699,16 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
.addReg(CondReg).addImm(0));
unsigned MovCCOpc;
+ const TargetRegisterClass *RC;
if (!UseImm) {
+ RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
} else {
- if (!isNegativeImm) {
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
+ if (!isNegativeImm)
MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
- } else {
+ else
MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
- }
}
unsigned ResultReg = createResultReg(RC);
if (!UseImm)
@@ -1783,7 +1810,9 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
}
bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
- EVT VT = TLI.getValueType(I->getType(), true);
+ EVT FPVT = TLI.getValueType(I->getType(), true);
+ if (!FPVT.isSimple()) return false;
+ MVT VT = FPVT.getSimpleVT();
// We can get here in the case when we want to use NEON for our fp
// operations, but can't figure out how to. Just use the vfp instructions
@@ -1814,7 +1843,7 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
unsigned Op2 = getRegForValue(I->getOperand(1));
if (Op2 == 0) return false;
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
.addReg(Op1).addReg(Op2));
@@ -2027,7 +2056,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
if (RVLocs.size() == 2 && RetVT == MVT::f64) {
// For this move we copy into two registers and then move into the
// double fp reg we want.
- EVT DestVT = RVLocs[0].getValVT();
+ MVT DestVT = RVLocs[0].getValVT();
const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
unsigned ResultReg = createResultReg(DstRC);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@@ -2042,7 +2071,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
UpdateValueMap(I, ResultReg);
} else {
assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
- EVT CopyVT = RVLocs[0].getValVT();
+ MVT CopyVT = RVLocs[0].getValVT();
// Special handling for extended integers.
if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
@@ -2073,8 +2102,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
CallingConv::ID CC = F.getCallingConv();
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
- Outs, TLI);
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
@@ -2101,8 +2129,10 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
return false;
unsigned SrcReg = Reg + VA.getValNo();
- EVT RVVT = TLI.getValueType(RV->getType());
- EVT DestVT = VA.getValVT();
+ EVT RVEVT = TLI.getValueType(RV->getType());
+ if (!RVEVT.isSimple()) return false;
+ MVT RVVT = RVEVT.getSimpleVT();
+ MVT DestVT = VA.getValVT();
// Special handling for extended integers.
if (RVVT != DestVT) {
if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
@@ -2147,7 +2177,9 @@ unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false,
GlobalValue::ExternalLinkage, 0, Name);
- return ARMMaterializeGV(GV, TLI.getValueType(GV->getType()));
+ EVT LCREVT = TLI.getValueType(GV->getType());
+ if (!LCREVT.isSimple()) return 0;
+ return ARMMaterializeGV(GV, LCREVT.getSimpleVT());
}
// A quick function that will emit a call for a named libcall in F with the
@@ -2256,6 +2288,9 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// Can't handle inline asm.
if (isa<InlineAsm>(Callee)) return false;
+ // Allow SelectionDAG isel to handle tail calls.
+ if (CI->isTailCall()) return false;
+
// Check the calling convention.
ImmutableCallSite CS(CI);
CallingConv::ID CC = CS.getCallingConv();
@@ -2395,21 +2430,30 @@ bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
}
bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
- uint64_t Len) {
+ uint64_t Len, unsigned Alignment) {
// Make sure we don't bloat code by inlining very large memcpy's.
if (!ARMIsMemCpySmall(Len))
return false;
- // We don't care about alignment here since we just emit integer accesses.
while (Len) {
MVT VT;
- if (Len >= 4)
- VT = MVT::i32;
- else if (Len >= 2)
- VT = MVT::i16;
- else {
- assert(Len == 1);
- VT = MVT::i8;
+ if (!Alignment || Alignment >= 4) {
+ if (Len >= 4)
+ VT = MVT::i32;
+ else if (Len >= 2)
+ VT = MVT::i16;
+ else {
+ assert (Len == 1 && "Expected a length of 1!");
+ VT = MVT::i8;
+ }
+ } else {
+ // Bound based on alignment.
+ if (Len >= 2 && Alignment == 2)
+ VT = MVT::i16;
+ else {
+ assert (Alignment == 1 && "Expected an alignment of 1!");
+ VT = MVT::i8;
+ }
}
bool RV;
@@ -2488,7 +2532,8 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
!ARMComputeAddress(MTI.getRawSource(), Src))
return false;
- if (ARMTryEmitSmallMemCpy(Dest, Src, Len))
+ unsigned Alignment = MTI.getAlignment();
+ if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment))
return true;
}
}
@@ -2546,18 +2591,19 @@ bool ARMFastISel::SelectTrunc(const Instruction *I) {
return true;
}
-unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
+unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
bool isZExt) {
if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
return 0;
unsigned Opc;
bool isBoolZext = false;
- if (!SrcVT.isSimple()) return 0;
- switch (SrcVT.getSimpleVT().SimpleTy) {
+ const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
+ switch (SrcVT.SimpleTy) {
default: return 0;
case MVT::i16:
if (!Subtarget->hasV6Ops()) return 0;
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
if (isZExt)
Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH;
else
@@ -2565,6 +2611,7 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
break;
case MVT::i8:
if (!Subtarget->hasV6Ops()) return 0;
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
if (isZExt)
Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB;
else
@@ -2572,6 +2619,7 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
break;
case MVT::i1:
if (isZExt) {
+ RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
isBoolZext = true;
break;
@@ -2579,7 +2627,7 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
return 0;
}
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ unsigned ResultReg = createResultReg(RC);
MachineInstrBuilder MIB;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
.addReg(SrcReg);
@@ -2598,14 +2646,18 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) {
Value *Src = I->getOperand(0);
Type *SrcTy = Src->getType();
- EVT SrcVT, DestVT;
- SrcVT = TLI.getValueType(SrcTy, true);
- DestVT = TLI.getValueType(DestTy, true);
-
bool isZExt = isa<ZExtInst>(I);
unsigned SrcReg = getRegForValue(Src);
if (!SrcReg) return false;
+ EVT SrcEVT, DestEVT;
+ SrcEVT = TLI.getValueType(SrcTy, true);
+ DestEVT = TLI.getValueType(DestTy, true);
+ if (!SrcEVT.isSimple()) return false;
+ if (!DestEVT.isSimple()) return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+ MVT DestVT = DestEVT.getSimpleVT();
unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
if (ResultReg == 0) return false;
UpdateValueMap(I, ResultReg);
@@ -2784,6 +2836,47 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
return true;
}
+unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
+ unsigned Align, MVT VT) {
+ bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
+ ARMConstantPoolConstant *CPV =
+ ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
+ unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+
+ unsigned Opc;
+ unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT));
+ // Load value.
+ if (isThumb2) {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::t2LDRpci), DestReg1)
+ .addConstantPoolIndex(Idx));
+ Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs;
+ } else {
+ // The extra immediate is for addrmode2.
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(ARM::LDRcp), DestReg1)
+ .addConstantPoolIndex(Idx).addImm(0));
+ Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs;
+ }
+
+ unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
+ if (GlobalBaseReg == 0) {
+ GlobalBaseReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT));
+ AFI->setGlobalBaseReg(GlobalBaseReg);
+ }
+
+ unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT));
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc), DestReg2)
+ .addReg(DestReg1)
+ .addReg(GlobalBaseReg);
+ if (!UseGOTOFF)
+ MIB.addImm(0);
+ AddOptionalDefs(MIB);
+
+ return DestReg2;
+}
+
namespace llvm {
FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) {
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index aee72d2..39d27c4 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -15,17 +15,16 @@
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Function.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -153,7 +152,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
int FramePtrSpillFI = 0;
int D8SpillFI = 0;
- // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue.
+ // All calls are tail calls in GHC calling conv, and functions have no
+ // prologue/epilogue.
if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
return;
@@ -360,7 +360,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
int NumBytes = (int)MFI->getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
- // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue.
+ // All calls are tail calls in GHC calling conv, and functions have no
+ // prologue/epilogue.
if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
return;
@@ -694,7 +695,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
MIB.addReg(Regs[i], getDefRegState(true));
if (DeleteRet) {
- MIB->copyImplicitOps(&*MI);
+ MIB.copyImplicitOps(&*MI);
MI->eraseFromParent();
}
MI = MIB;
@@ -1151,7 +1152,8 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
return;
// Naked functions don't spill callee-saved registers.
- if (MF.getFunction()->hasFnAttr(Attribute::Naked))
+ if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Naked))
return;
// We are planning to use NEON instructions vst1 / vld1.
@@ -1176,7 +1178,7 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned NumSpills = 0;
for (; NumSpills < 8; ++NumSpills)
- if (!MRI.isPhysRegOrOverlapUsed(ARM::D8 + NumSpills))
+ if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills))
break;
// Don't do this for just one d-register. It's not worth it.
@@ -1209,6 +1211,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
*static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
// Spill R4 if Thumb2 function requires stack realignment - it will be used as
@@ -1218,12 +1221,12 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// FIXME: It will be better just to find spare register here.
if (AFI->isThumb2Function() &&
(MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
- MF.getRegInfo().setPhysRegUsed(ARM::R4);
+ MRI.setPhysRegUsed(ARM::R4);
if (AFI->isThumb1OnlyFunction()) {
// Spill LR if Thumb1 function uses variable length argument lists.
if (AFI->getVarArgsRegSaveSize() > 0)
- MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ MRI.setPhysRegUsed(ARM::LR);
// Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
// for sure what the stack size will be, but for this, an estimate is good
@@ -1233,7 +1236,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// FIXME: It will be better just to find spare register here.
unsigned StackSize = estimateStackSize(MF);
if (MFI->hasVarSizedObjects() || StackSize > 508)
- MF.getRegInfo().setPhysRegUsed(ARM::R4);
+ MRI.setPhysRegUsed(ARM::R4);
}
// See if we can spill vector registers to aligned stack.
@@ -1241,7 +1244,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Spill the BasePtr if it's used.
if (RegInfo->hasBasePointer(MF))
- MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
+ MRI.setPhysRegUsed(RegInfo->getBaseRegister());
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is used.
@@ -1249,7 +1252,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
bool Spilled = false;
- if (MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
+ if (MRI.isPhysRegUsed(Reg)) {
Spilled = true;
CanEliminateFrame = false;
}
@@ -1338,7 +1341,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
// Spill LR as well so we can fold BX_RET to the registers restore (LDM).
if (!LRSpilled && CS1Spilled) {
- MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ MRI.setPhysRegUsed(ARM::LR);
NumGPRSpills++;
UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
@@ -1347,7 +1350,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
if (hasFP(MF)) {
- MF.getRegInfo().setPhysRegUsed(FramePtr);
+ MRI.setPhysRegUsed(FramePtr);
NumGPRSpills++;
}
@@ -1362,16 +1365,16 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Don't spill high register if the function is thumb1
if (!AFI->isThumb1OnlyFunction() ||
isARMLowRegister(Reg) || Reg == ARM::LR) {
- MF.getRegInfo().setPhysRegUsed(Reg);
- if (!RegInfo->isReservedReg(MF, Reg))
+ MRI.setPhysRegUsed(Reg);
+ if (!MRI.isReserved(Reg))
ExtraCSSpill = true;
break;
}
}
} else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
unsigned Reg = UnspilledCS2GPRs.front();
- MF.getRegInfo().setPhysRegUsed(Reg);
- if (!RegInfo->isReservedReg(MF, Reg))
+ MRI.setPhysRegUsed(Reg);
+ if (!MRI.isReserved(Reg))
ExtraCSSpill = true;
}
}
@@ -1389,7 +1392,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
while (NumExtras && !UnspilledCS1GPRs.empty()) {
unsigned Reg = UnspilledCS1GPRs.back();
UnspilledCS1GPRs.pop_back();
- if (!RegInfo->isReservedReg(MF, Reg) &&
+ if (!MRI.isReserved(Reg) &&
(!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
Reg == ARM::LR)) {
Extras.push_back(Reg);
@@ -1401,7 +1404,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
while (NumExtras && !UnspilledCS2GPRs.empty()) {
unsigned Reg = UnspilledCS2GPRs.back();
UnspilledCS2GPRs.pop_back();
- if (!RegInfo->isReservedReg(MF, Reg)) {
+ if (!MRI.isReserved(Reg)) {
Extras.push_back(Reg);
NumExtras--;
}
@@ -1409,7 +1412,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
if (Extras.size() && NumExtras == 0) {
for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
- MF.getRegInfo().setPhysRegUsed(Extras[i]);
+ MRI.setPhysRegUsed(Extras[i]);
}
} else if (!AFI->isThumb1OnlyFunction()) {
// note: Thumb1 functions spill to R12, not the stack. Reserve a slot
@@ -1423,7 +1426,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
if (ForceLRSpill) {
- MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ MRI.setPhysRegUsed(ARM::LR);
AFI->setLRIsSpilledForFarJump(true);
}
}
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index a5fd15b..1240169 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -47,7 +47,7 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
// Skip over one non-VFP / NEON instruction.
if (!LastMI->isBarrier() &&
// On A9, AGU and NEON/FPU are muxed.
- !(STI.isCortexA9() && (LastMI->mayLoad() || LastMI->mayStore())) &&
+ !(STI.isLikeA9() && (LastMI->mayLoad() || LastMI->mayStore())) &&
(LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
MachineBasicBlock::iterator I = LastMI;
if (I != LastMI->getParent()->begin()) {
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 1406620..939bed7 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -16,24 +16,24 @@
#include "ARMBaseInstrInfo.h"
#include "ARMTargetMachine.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -78,6 +78,8 @@ public:
return "ARM Instruction Selection";
}
+ virtual void PreprocessISelDAG();
+
/// getI32Imm - Return a target constant of type i32 with the specified
/// value.
inline SDValue getI32Imm(unsigned Imm) {
@@ -265,15 +267,16 @@ private:
char ConstraintCode,
std::vector<SDValue> &OutOps);
- // Form pairs of consecutive S, D, or Q registers.
- SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1);
- SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1);
- SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1);
+ // Form pairs of consecutive R, S, D, or Q registers.
+ SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
+ SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
+ SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
+ SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
// Form sequences of 4 consecutive S, D, or Q registers.
- SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
- SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
- SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
// Get the alignment operand for a NEON VLD or VST instruction.
SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector);
@@ -305,7 +308,7 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
}
/// \brief Check whether a particular node is a constant value representable as
-/// (N * Scale) where (N in [\arg RangeMin, \arg RangeMax).
+/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
///
/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
static bool isScaledConstantInRange(SDValue Node, int Scale,
@@ -326,6 +329,87 @@ static bool isScaledConstantInRange(SDValue Node, int Scale,
return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
}
+void ARMDAGToDAGISel::PreprocessISelDAG() {
+ if (!Subtarget->hasV6T2Ops())
+ return;
+
+ bool isThumb2 = Subtarget->isThumb();
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ) {
+ SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+
+ if (N->getOpcode() != ISD::ADD)
+ continue;
+
+ // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
+ // leading zeros, followed by consecutive set bits, followed by 1 or 2
+ // trailing zeros, e.g. 1020.
+ // Transform the expression to
+ // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
+ // of trailing zeros of c2. The left shift would be folded as an shifter
+ // operand of 'add' and the 'and' and 'srl' would become a bits extraction
+ // node (UBFX).
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ unsigned And_imm = 0;
+ if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
+ if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
+ std::swap(N0, N1);
+ }
+ if (!And_imm)
+ continue;
+
+ // Check if the AND mask is an immediate of the form: 000.....1111111100
+ unsigned TZ = CountTrailingZeros_32(And_imm);
+ if (TZ != 1 && TZ != 2)
+ // Be conservative here. Shifter operands aren't always free. e.g. On
+ // Swift, left shifter operand of 1 / 2 for free but others are not.
+ // e.g.
+ // ubfx r3, r1, #16, #8
+ // ldr.w r3, [r0, r3, lsl #2]
+ // vs.
+ // mov.w r9, #1020
+ // and.w r2, r9, r1, lsr #14
+ // ldr r2, [r0, r2]
+ continue;
+ And_imm >>= TZ;
+ if (And_imm & (And_imm + 1))
+ continue;
+
+ // Look for (and (srl X, c1), c2).
+ SDValue Srl = N1.getOperand(0);
+ unsigned Srl_imm = 0;
+ if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
+ (Srl_imm <= 2))
+ continue;
+
+ // Make sure first operand is not a shifter operand which would prevent
+ // folding of the left shift.
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ SDValue CPTmp2;
+ if (isThumb2) {
+ if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1))
+ continue;
+ } else {
+ if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
+ SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
+ continue;
+ }
+
+ // Now make the transformation.
+ Srl = CurDAG->getNode(ISD::SRL, Srl.getDebugLoc(), MVT::i32,
+ Srl.getOperand(0),
+ CurDAG->getConstant(Srl_imm+TZ, MVT::i32));
+ N1 = CurDAG->getNode(ISD::AND, N1.getDebugLoc(), MVT::i32,
+ Srl, CurDAG->getConstant(And_imm, MVT::i32));
+ N1 = CurDAG->getNode(ISD::SHL, N1.getDebugLoc(), MVT::i32,
+ N1, CurDAG->getConstant(TZ, MVT::i32));
+ CurDAG->UpdateNodeOperands(N, N0, N1);
+ }
+}
+
/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
/// least on current ARM implementations) which should be avoidded.
@@ -336,7 +420,8 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
if (!CheckVMLxHazard)
return true;
- if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9())
+ if (!Subtarget->isCortexA8() && !Subtarget->isLikeA9() &&
+ !Subtarget->isSwift())
return true;
if (!N->hasOneUse())
@@ -374,12 +459,13 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
ARM_AM::ShiftOpc ShOpcVal,
unsigned ShAmt) {
- if (!Subtarget->isCortexA9())
+ if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
return true;
if (Shift.hasOneUse())
return true;
// R << 2 is free.
- return ShOpcVal == ARM_AM::lsl && ShAmt == 2;
+ return ShOpcVal == ARM_AM::lsl &&
+ (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
}
bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
@@ -486,7 +572,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
SDValue &Opc) {
if (N.getOpcode() == ISD::MUL &&
- (!Subtarget->isCortexA9() || N.hasOneUse())) {
+ ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
// X * [3,5,9] -> X + X * [2,4,8] etc.
int RHSC = (int)RHS->getZExtValue();
@@ -550,7 +636,8 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
// Try matching (R shl C) + (R).
if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
- !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
+ !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
+ N.getOperand(0).hasOneUse())) {
ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't
@@ -584,7 +671,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
SDValue &Offset,
SDValue &Opc) {
if (N.getOpcode() == ISD::MUL &&
- (!Subtarget->isCortexA9() || N.hasOneUse())) {
+ (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
// X * [3,5,9] -> X + X * [2,4,8] etc.
int RHSC = (int)RHS->getZExtValue();
@@ -650,7 +737,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
}
}
- if (Subtarget->isCortexA9() && !N.hasOneUse()) {
+ if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
// Compute R +/- (R << N) and reuse it.
Base = N;
Offset = CurDAG->getRegister(0, MVT::i32);
@@ -688,7 +775,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
// Try matching (R shl C) + (R).
if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
- !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
+ !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
+ N.getOperand(0).hasOneUse())) {
ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't
@@ -1440,9 +1528,19 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
return NULL;
}
-/// PairSRegs - Form a D register from a pair of S registers.
-///
-SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
+/// \brief Form a GPRPair pseudo register from a pair of GPR regs.
+SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass =
+ CurDAG->getTargetConstant(ARM::GPRPairRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+}
+
+/// \brief Form a D register from a pair of S registers.
+SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue RegClass =
CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32);
@@ -1452,9 +1550,8 @@ SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
}
-/// PairDRegs - Form a quad register from a pair of D registers.
-///
-SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
+/// \brief Form a quad register from a pair of D registers.
+SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
@@ -1463,9 +1560,8 @@ SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
}
-/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers.
-///
-SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
+/// \brief Form 4 consecutive D registers from a pair of Q registers.
+SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
@@ -1474,9 +1570,8 @@ SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
}
-/// QuadSRegs - Form 4 consecutive S registers.
-///
-SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
+/// \brief Form 4 consecutive S registers.
+SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue RegClass =
@@ -1490,9 +1585,8 @@ SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
}
-/// QuadDRegs - Form 4 consecutive D registers.
-///
-SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
+/// \brief Form 4 consecutive D registers.
+SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
@@ -1505,9 +1599,8 @@ SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
}
-/// QuadQRegs - Form 4 consecutive Q registers.
-///
-SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
+/// \brief Form 4 consecutive Q registers.
+SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32);
@@ -1780,7 +1873,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue V0 = N->getOperand(Vec0Idx + 0);
SDValue V1 = N->getOperand(Vec0Idx + 1);
if (NumVecs == 2)
- SrcReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
else {
SDValue V2 = N->getOperand(Vec0Idx + 2);
// If it's a vst3, form a quad D-register and leave the last part as
@@ -1788,13 +1881,13 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
: N->getOperand(Vec0Idx + 3);
- SrcReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
}
} else {
// Form a QQ register.
SDValue Q0 = N->getOperand(Vec0Idx);
SDValue Q1 = N->getOperand(Vec0Idx + 1);
- SrcReg = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
+ SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
}
unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
@@ -1836,7 +1929,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
: N->getOperand(Vec0Idx + 3);
- SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+ SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
// Store the even D registers. This is always an updating store, so that it
// provides the address to the second store for the odd subregs.
@@ -1946,18 +2039,18 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
SDValue V1 = N->getOperand(Vec0Idx + 1);
if (NumVecs == 2) {
if (is64BitVector)
- SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
else
- SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
+ SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
} else {
SDValue V2 = N->getOperand(Vec0Idx + 2);
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
: N->getOperand(Vec0Idx + 3);
if (is64BitVector)
- SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
else
- SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+ SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
}
Ops.push_back(SuperReg);
Ops.push_back(getI32Imm(Lane));
@@ -2083,7 +2176,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
SDValue V0 = N->getOperand(FirstTblReg + 0);
SDValue V1 = N->getOperand(FirstTblReg + 1);
if (NumVecs == 2)
- RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+ RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
else {
SDValue V2 = N->getOperand(FirstTblReg + 2);
// If it's a vtbl3, form a quad D-register and leave the last part as
@@ -2091,7 +2184,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
: N->getOperand(FirstTblReg + 3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
}
SmallVector<SDValue, 6> Ops;
@@ -2109,10 +2202,10 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
if (!Subtarget->hasV6T2Ops())
return NULL;
- unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
+ unsigned Opc = isSigned
+ ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
: (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
-
// For unsigned extracts, check for a shift right and mask
unsigned And_imm = 0;
if (N->getOpcode() == ISD::AND) {
@@ -2130,7 +2223,29 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
// Note: The width operand is encoded as width-1.
unsigned Width = CountTrailingOnes_32(And_imm) - 1;
unsigned LSB = Srl_imm;
+
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+
+ if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
+ // It's cheaper to use a right shift to extract the top bits.
+ if (Subtarget->isThumb()) {
+ Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ CurDAG->getTargetConstant(LSB, MVT::i32),
+ getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+
+ // ARM models shift instructions as MOVsi with shifter operand.
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
+ SDValue ShOpc =
+ CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB),
+ MVT::i32);
+ SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
+ getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops, 5);
+ }
+
SDValue Ops[] = { N->getOperand(0).getOperand(0),
CurDAG->getTargetConstant(LSB, MVT::i32),
CurDAG->getTargetConstant(Width, MVT::i32),
@@ -2407,7 +2522,7 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
EVT VT = N->getValueType(0);
if (!VT.is128BitVector() || N->getNumOperands() != 2)
llvm_unreachable("unexpected CONCAT_VECTORS");
- return PairDRegs(VT, N->getOperand(0), N->getOperand(1));
+ return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
}
SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
@@ -2786,13 +2901,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
unsigned NumElts = VecVT.getVectorNumElements();
if (EltVT == MVT::f64) {
assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
- return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1));
+ return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
}
assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
if (NumElts == 2)
- return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1));
+ return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
- return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1),
+ return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
N->getOperand(2), N->getOperand(3));
}
@@ -3005,17 +3120,19 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
SDValue Chain = N->getOperand(0);
- unsigned NewOpc = ARM::LDREXD;
- if (Subtarget->isThumb() && Subtarget->hasThumb2())
- NewOpc = ARM::t2LDREXD;
+ bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
+ unsigned NewOpc = isThumb ? ARM::t2LDREXD :ARM::LDREXD;
// arm_ldrexd returns a i64 value in {i32, i32}
std::vector<EVT> ResTys;
- ResTys.push_back(MVT::i32);
- ResTys.push_back(MVT::i32);
+ if (isThumb) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ } else
+ ResTys.push_back(MVT::Untyped);
ResTys.push_back(MVT::Other);
- // place arguments in the right order
+ // Place arguments in the right order.
SmallVector<SDValue, 7> Ops;
Ops.push_back(MemAddr);
Ops.push_back(getAL(CurDAG));
@@ -3028,30 +3145,35 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
- // Until there's support for specifing explicit register constraints
- // like the use of even/odd register pair, hardcode ldrexd to always
- // use the pair [R0, R1] to hold the load result.
- Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R0,
- SDValue(Ld, 0), SDValue(0,0));
- Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R1,
- SDValue(Ld, 1), Chain.getValue(1));
-
// Remap uses.
- SDValue Glue = Chain.getValue(1);
+ SDValue Glue = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
if (!SDValue(N, 0).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- ARM::R0, MVT::i32, Glue);
- Glue = Result.getValue(2);
+ SDValue Result;
+ if (isThumb)
+ Result = SDValue(Ld, 0);
+ else {
+ SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
+ SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl, MVT::i32, MVT::Glue, SDValue(Ld, 0), SubRegIdx, Glue);
+ Result = SDValue(ResNode,0);
+ Glue = Result.getValue(1);
+ }
ReplaceUses(SDValue(N, 0), Result);
}
if (!SDValue(N, 1).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- ARM::R1, MVT::i32, Glue);
- Glue = Result.getValue(2);
+ SDValue Result;
+ if (isThumb)
+ Result = SDValue(Ld, 1);
+ else {
+ SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
+ SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl, MVT::i32, MVT::Glue, SDValue(Ld, 0), SubRegIdx, Glue);
+ Result = SDValue(ResNode,0);
+ Glue = Result.getValue(1);
+ }
ReplaceUses(SDValue(N, 1), Result);
}
-
- ReplaceUses(SDValue(N, 2), SDValue(Ld, 2));
+ ReplaceUses(SDValue(N, 2), Glue);
return NULL;
}
@@ -3062,38 +3184,27 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Val1 = N->getOperand(3);
SDValue MemAddr = N->getOperand(4);
- // Until there's support for specifing explicit register constraints
- // like the use of even/odd register pair, hardcode strexd to always
- // use the pair [R2, R3] to hold the i64 (i32, i32) value to be stored.
- Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R2, Val0,
- SDValue(0, 0));
- Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R3, Val1, Chain.getValue(1));
-
- SDValue Glue = Chain.getValue(1);
- Val0 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- ARM::R2, MVT::i32, Glue);
- Glue = Val0.getValue(1);
- Val1 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- ARM::R3, MVT::i32, Glue);
-
// Store exclusive double return a i32 value which is the return status
// of the issued store.
std::vector<EVT> ResTys;
ResTys.push_back(MVT::i32);
ResTys.push_back(MVT::Other);
- // place arguments in the right order
+ bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
+ // Place arguments in the right order.
SmallVector<SDValue, 7> Ops;
- Ops.push_back(Val0);
- Ops.push_back(Val1);
+ if (isThumb) {
+ Ops.push_back(Val0);
+ Ops.push_back(Val1);
+ } else
+ // arm_strexd uses GPRPair.
+ Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
Ops.push_back(MemAddr);
Ops.push_back(getAL(CurDAG));
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
Ops.push_back(Chain);
- unsigned NewOpc = ARM::STREXD;
- if (Subtarget->isThumb() && Subtarget->hasThumb2())
- NewOpc = ARM::t2STREXD;
+ unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD;
SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
Ops.size());
@@ -3291,7 +3402,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
// Form a REG_SEQUENCE to force register allocation.
SDValue V0 = N->getOperand(0);
SDValue V1 = N->getOperand(1);
- SDValue RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+ SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
SmallVector<SDValue, 6> Ops;
Ops.push_back(RegSeq);
@@ -3321,6 +3432,15 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
return SelectAtomic64(N, ARM::ATOMSWAP6432);
case ARMISD::ATOMCMPXCHG64_DAG:
return SelectAtomic64(N, ARM::ATOMCMPXCHG6432);
+
+ case ARMISD::ATOMMIN64_DAG:
+ return SelectAtomic64(N, ARM::ATOMMIN6432);
+ case ARMISD::ATOMUMIN64_DAG:
+ return SelectAtomic64(N, ARM::ATOMUMIN6432);
+ case ARMISD::ATOMMAX64_DAG:
+ return SelectAtomic64(N, ARM::ATOMMAX6432);
+ case ARMISD::ATOMUMAX64_DAG:
+ return SelectAtomic64(N, ARM::ATOMUMAX6432);
}
return SelectCode(N);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index e51315e..5b3e31f 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -23,14 +23,8 @@
#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Instruction.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Type.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -40,14 +34,20 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
@@ -122,6 +122,7 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (VT.isInteger()) {
setOperationAction(ISD::SHL, VT, Custom);
@@ -514,6 +515,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
+ setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
// Neon does not support some operations on v1i64 and v2i64 types.
@@ -538,6 +543,17 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+
+ // NEON does not have single instruction CTPOP for vectors with element
+ // types wider than 8-bits. However, custom lowering can leverage the
+ // v8i8/v16i8 vcnt instruction.
+ setOperationAction(ISD::CTPOP, MVT::v2i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
+
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
@@ -635,9 +651,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
- // These are expanded into libcalls.
- if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) {
- // v7M has a hardware divider
+ if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
+ !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
+ // These are expanded into libcalls if the cpu doesn't have HW divider.
setOperationAction(ISD::SDIV, MVT::i32, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
}
@@ -687,7 +703,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
// Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
setInsertFencesForAtomic(true);
@@ -813,9 +833,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setSchedulingPreference(Sched::Hybrid);
//// temporary - rewrite interface to use type
- maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
- maxStoresPerMemset = 16;
+ maxStoresPerMemset = 8;
maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ maxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
+ maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+ maxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
+ maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
// On ARM arguments smaller than 4 bytes are extended, so all arguments
// are at least 4 bytes aligned.
@@ -824,7 +847,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
benefitFromCodePlacementOpt = true;
// Prefer likely predicted branches to selects on out-of-order cores.
- predictableSelectIsExpensive = Subtarget->isCortexA9();
+ predictableSelectIsExpensive = Subtarget->isLikeA9();
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
@@ -840,10 +863,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// due to the common occurrence of cross class copies and subregister insertions
// and extractions.
std::pair<const TargetRegisterClass*, uint8_t>
-ARMTargetLowering::findRepresentativeClass(EVT VT) const{
+ARMTargetLowering::findRepresentativeClass(MVT VT) const{
const TargetRegisterClass *RRC = 0;
uint8_t Cost = 1;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
default:
return TargetLowering::findRepresentativeClass(VT);
// Use DPR as representative register class for all floating point
@@ -1023,7 +1046,7 @@ EVT ARMTargetLowering::getSetCCResultType(EVT VT) const {
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
-const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
+const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
// Map v4i64 to QQ registers but do not make the type legal. Similarly map
// v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
// load / store 4 to 8 consecutive D registers.
@@ -1593,19 +1616,19 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// FIXME: handle tail calls differently.
unsigned CallOpc;
+ bool HasMinSizeAttr = MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
- else if (doesNotRet && isDirect && !isARMFunc &&
- Subtarget->hasRAS() && !Subtarget->isThumb1Only())
- // "mov lr, pc; b _foo" to avoid confusing the RSP
- CallOpc = ARMISD::CALL_NOLINK;
else
CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
} else {
- if (!isDirect && !Subtarget->hasV5TOps()) {
+ if (!isDirect && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
- } else if (doesNotRet && isDirect && Subtarget->hasRAS())
+ else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
+ // Emit regular call when code size is the priority
+ !HasMinSizeAttr)
// "mov lr, pc; b _foo" to avoid confusing the RSP
CallOpc = ARMISD::CALL_NOLINK;
else
@@ -1655,22 +1678,31 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
/// and then confiscate the rest of the parameter registers to insure
/// this.
void
-ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
+ARMTargetLowering::HandleByVal(
+ CCState *State, unsigned &size, unsigned Align) const {
unsigned reg = State->AllocateReg(GPRArgRegs, 4);
assert((State->getCallOrPrologue() == Prologue ||
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
if ((!State->isFirstByValRegValid()) &&
(ARM::R0 <= reg) && (reg <= ARM::R3)) {
- State->setFirstByValReg(reg);
- // At a call site, a byval parameter that is split between
- // registers and memory needs its size truncated here. In a
- // function prologue, such byval parameters are reassembled in
- // memory, and are not truncated.
- if (State->getCallOrPrologue() == Call) {
- unsigned excess = 4 * (ARM::R4 - reg);
- assert(size >= excess && "expected larger existing stack allocation");
- size -= excess;
+ if (Subtarget->isAAPCS_ABI() && Align > 4) {
+ unsigned AlignInRegs = Align / 4;
+ unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
+ for (unsigned i = 0; i < Waste; ++i)
+ reg = State->AllocateReg(GPRArgRegs, 4);
+ }
+ if (reg != 0) {
+ State->setFirstByValReg(reg);
+ // At a call site, a byval parameter that is split between
+ // registers and memory needs its size truncated here. In a
+ // function prologue, such byval parameters are reassembled in
+ // memory, and are not truncated.
+ if (State->getCallOrPrologue() == Call) {
+ unsigned excess = 4 * (ARM::R4 - reg);
+ assert(size >= excess && "expected larger existing stack allocation");
+ size -= excess;
+ }
}
}
// Confiscate any remaining parameter registers to preclude their
@@ -1803,6 +1835,14 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
}
}
+ // If Caller's vararg or byval argument has been split between registers and
+ // stack, do not perform tail call, since part of the argument is in caller's
+ // local frame.
+ const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
+ getInfo<ARMFunctionInfo>();
+ if (AFI_Caller->getVarArgsRegSaveSize())
+ return false;
+
// If the callee takes no arguments then go on to check the results of the
// call.
if (!Outs.empty()) {
@@ -1857,6 +1897,17 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
return true;
}
+bool
+ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
+ isVarArg));
+}
+
SDValue
ARMTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -2534,7 +2585,10 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
void
ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
DebugLoc dl, SDValue &Chain,
- unsigned ArgOffset) const {
+ const Value *OrigArg,
+ unsigned OffsetFromOrigArg,
+ unsigned ArgOffset,
+ bool ForceMutable) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -2561,7 +2615,7 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
getPointerTy());
SmallVector<SDValue, 4> MemOps;
- for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) {
+ for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) {
const TargetRegisterClass *RC;
if (AFI->isThumb1OnlyFunction())
RC = &ARM::tGPRRegClass;
@@ -2572,7 +2626,7 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
+ MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),
false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
@@ -2583,7 +2637,8 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
&MemOps[0], MemOps.size());
} else
// This will point to the next argument passed via stack.
- AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
+ AFI->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(4, ArgOffset, !ForceMutable));
}
SDValue
@@ -2606,14 +2661,16 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
CCInfo.AnalyzeFormalArguments(Ins,
CCAssignFnForNode(CallConv, /* Return*/ false,
isVarArg));
-
+
SmallVector<SDValue, 16> ArgValues;
int lastInsIndex = -1;
-
SDValue ArgValue;
+ Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
+ unsigned CurArgIdx = 0;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
-
+ std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
+ CurArgIdx = Ins[VA.getValNo()].OrigArgIndex;
// Arguments stored in registers.
if (VA.isRegLoc()) {
EVT RegVT = VA.getLocVT();
@@ -2707,14 +2764,20 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Since they could be overwritten by lowering of arguments in case of
// a tail call.
if (Flags.isByVal()) {
- unsigned VARegSize, VARegSaveSize;
- computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
- VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0);
- unsigned Bytes = Flags.getByValSize() - VARegSize;
- if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
- int FI = MFI->CreateFixedObject(Bytes,
- VA.getLocMemOffset(), false);
- InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (!AFI->getVarArgsFrameIndex()) {
+ VarArgStyleRegisters(CCInfo, DAG,
+ dl, Chain, CurOrigArg,
+ Ins[VA.getValNo()].PartOffset,
+ VA.getLocMemOffset(),
+ true /*force mutable frames*/);
+ int VAFrameIndex = AFI->getVarArgsFrameIndex();
+ InVals.push_back(DAG.getFrameIndex(VAFrameIndex, getPointerTy()));
+ } else {
+ int FI = MFI->CreateFixedObject(Flags.getByValSize(),
+ VA.getLocMemOffset(), false);
+ InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
+ }
} else {
int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
VA.getLocMemOffset(), true);
@@ -2732,7 +2795,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// varargs
if (isVarArg)
- VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset());
+ VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0, 0,
+ CCInfo.getNextStackOffset());
return Chain;
}
@@ -3501,6 +3565,114 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
}
+/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
+/// for each 16-bit element from operand, repeated. The basic idea is to
+/// leverage vcnt to get the 8-bit counts, gather and add the results.
+///
+/// Trace for v4i16:
+/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
+/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
+/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
+/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
+/// [b0 b1 b2 b3 b4 b5 b6 b7]
+/// +[b1 b0 b3 b2 b5 b4 b7 b6]
+/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
+/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits)
+static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
+ SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
+ SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
+ SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
+ SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
+ return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
+}
+
+/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
+/// bit-count for each 16-bit element from the operand. We need slightly
+/// different sequencing for v4i16 and v8i16 to stay within NEON's available
+/// 64/128-bit registers.
+///
+/// Trace for v4i16:
+/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
+/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
+/// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ]
+/// v4i16:Extracted = [k0 k1 k2 k3 ]
+static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
+ if (VT.is64BitVector()) {
+ SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
+ DAG.getIntPtrConstant(0));
+ } else {
+ SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
+ BitCounts, DAG.getIntPtrConstant(0));
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
+ }
+}
+
+/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
+/// bit-count for each 32-bit element from the operand. The idea here is
+/// to split the vector into 16-bit elements, leverage the 16-bit count
+/// routine, and then combine the results.
+///
+/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
+/// input = [v0 v1 ] (vi: 32-bit elements)
+/// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
+/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
+/// vrev: N0 = [k1 k0 k3 k2 ]
+/// [k0 k1 k2 k3 ]
+/// N1 =+[k1 k0 k3 k2 ]
+/// [k0 k2 k1 k3 ]
+/// N2 =+[k1 k3 k0 k2 ]
+/// [k0 k2 k1 k3 ]
+/// Extended =+[k1 k3 k0 k2 ]
+/// [k0 k2 ]
+/// Extracted=+[k1 k3 ]
+///
+static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
+
+ SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
+ SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
+ SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
+ SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
+ SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
+
+ if (VT.is64BitVector()) {
+ SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
+ DAG.getIntPtrConstant(0));
+ } else {
+ SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
+ DAG.getIntPtrConstant(0));
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
+ }
+}
+
+static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+
+ assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
+ assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||
+ VT == MVT::v4i16 || VT == MVT::v8i16) &&
+ "Unexpected type for custom ctpop lowering");
+
+ if (VT.getVectorElementType() == MVT::i32)
+ return lowerCTPOP32BitElements(N, DAG);
+ else
+ return lowerCTPOP16BitElements(N, DAG);
+}
+
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
@@ -3892,6 +4064,36 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
+// check if an VEXT instruction can handle the shuffle mask when the
+// vector sources of the shuffle are the same.
+static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Assume that the first shuffle index is not UNDEF. Fail if it is.
+ if (M[0] < 0)
+ return false;
+
+ Imm = M[0];
+
+ // If this is a VEXT shuffle, the immediate value is the index of the first
+ // element. The other shuffle indices must be the successive elements after
+ // the first one.
+ unsigned ExpectedElt = Imm;
+ for (unsigned i = 1; i < NumElts; ++i) {
+ // Increment the expected index. If it wraps around, just follow it
+ // back to index zero and keep going.
+ ++ExpectedElt;
+ if (ExpectedElt == NumElts)
+ ExpectedElt = 0;
+
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if (ExpectedElt != static_cast<unsigned>(M[i]))
+ return false;
+ }
+
+ return true;
+}
+
static bool isVEXTMask(ArrayRef<int> M, EVT VT,
bool &ReverseVEXT, unsigned &Imm) {
@@ -4215,9 +4417,26 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// If we are VDUPing a value that comes directly from a vector, that will
// cause an unnecessary move to and from a GPR, where instead we could
// just use VDUPLANE.
- if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT)
- N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+ if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ // We need to create a new undef vector to use for the VDUPLANE if the
+ // size of the vector from which we get the value is different than the
+ // size of the vector that we need to create. We will insert the element
+ // such that the register coalescer will remove unnecessary copies.
+ if (VT != Value->getOperand(0).getValueType()) {
+ ConstantSDNode *constIndex;
+ constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
+ assert(constIndex && "The index is not a constant!");
+ unsigned index = constIndex->getAPIntValue().getLimitedValue() %
+ VT.getVectorNumElements();
+ N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
+ Value, DAG.getConstant(index, MVT::i32)),
+ DAG.getConstant(index, MVT::i32));
+ } else {
+ N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
Value->getOperand(0), Value->getOperand(1));
+ }
+ }
else
N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
@@ -4634,6 +4853,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if (isVREVMask(ShuffleMask, VT, 16))
return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
+ if (V2->getOpcode() == ISD::UNDEF &&
+ isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
+ return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
+ DAG.getConstant(Imm, MVT::i32));
+ }
+
// Check for Neon shuffles that modify both input vectors in place.
// If both results are used, i.e., if there are two shuffles with the same
// source operands and with masks corresponding to both results of one of
@@ -4833,16 +5058,76 @@ static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
return false;
}
-/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending
-/// load, or BUILD_VECTOR with extended elements, return the unextended value.
-static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
+/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
+/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
+/// We insert the required extension here to get the vector to fill a D register.
+static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
+ const EVT &OrigTy,
+ const EVT &ExtTy,
+ unsigned ExtOpcode) {
+ // The vector originally had a size of OrigTy. It was then extended to ExtTy.
+ // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
+ // 64-bits we need to insert a new extension so that it will be 64-bits.
+ assert(ExtTy.is128BitVector() && "Unexpected extension size");
+ if (OrigTy.getSizeInBits() >= 64)
+ return N;
+
+ // Must extend size to at least 64 bits to be used as an operand for VMULL.
+ MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy;
+ EVT NewVT;
+ switch (OrigSimpleTy) {
+ default: llvm_unreachable("Unexpected Orig Vector Type");
+ case MVT::v2i8:
+ case MVT::v2i16:
+ NewVT = MVT::v2i32;
+ break;
+ case MVT::v4i8:
+ NewVT = MVT::v4i16;
+ break;
+ }
+ return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N);
+}
+
+/// SkipLoadExtensionForVMULL - return a load of the original vector size that
+/// does not do any sign/zero extension. If the original vector is less
+/// than 64 bits, an appropriate extension will be added after the load to
+/// reach a total size of 64 bits. We have to add the extension separately
+/// because ARM does not have a sign/zero extending load for vectors.
+static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
+ SDValue NonExtendingLoad =
+ DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(),
+ LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->isInvariant(),
+ LD->getAlignment());
+ unsigned ExtOp = 0;
+ switch (LD->getExtensionType()) {
+ default: llvm_unreachable("Unexpected LoadExtType");
+ case ISD::EXTLOAD:
+ case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break;
+ case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break;
+ }
+ MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy;
+ MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy;
+ return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG,
+ MemType, ExtType, ExtOp);
+}
+
+/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
+/// extending load, or BUILD_VECTOR with extended elements, return the
+/// unextended value. The unextended vector should be 64 bits so that it can
+/// be used as an operand to a VMULL instruction. If the original vector size
+/// before extension is less than 64 bits we add a an extension to resize
+/// the vector to 64 bits.
+static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
- return N->getOperand(0);
+ return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
+ N->getOperand(0)->getValueType(0),
+ N->getValueType(0),
+ N->getOpcode());
+
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
- return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
- LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
- LD->isNonTemporal(), LD->isInvariant(),
- LD->getAlignment());
+ return SkipLoadExtensionForVMULL(LD, DAG);
+
// Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
// have been legalized as a BITCAST from v4i32.
if (N->getOpcode() == ISD::BITCAST) {
@@ -4897,7 +5182,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
// Multiplications are only custom-lowered for 128-bit vectors so that
// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
EVT VT = Op.getValueType();
- assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
+ assert(VT.is128BitVector() && VT.isInteger() &&
+ "unexpected type for custom-lowering ISD::MUL");
SDNode *N0 = Op.getOperand(0).getNode();
SDNode *N1 = Op.getOperand(1).getNode();
unsigned NewOpc = 0;
@@ -4940,9 +5226,9 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
// Legalize to a VMULL instruction.
DebugLoc DL = Op.getDebugLoc();
SDValue Op0;
- SDValue Op1 = SkipExtension(N1, DAG);
+ SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
if (!isMLA) {
- Op0 = SkipExtension(N0, DAG);
+ Op0 = SkipExtensionForVMULL(N0, DAG);
assert(Op0.getValueType().is64BitVector() &&
Op1.getValueType().is64BitVector() &&
"unexpected types for extended operands to VMULL");
@@ -4957,8 +5243,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
// vaddl q0, d4, d5
// vmovl q1, d6
// vmul q0, q0, q1
- SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG);
- SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG);
+ SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
+ SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
EVT Op1VT = Op1.getValueType();
return DAG.getNode(N0->getOpcode(), DL, VT,
DAG.getNode(NewOpc, DL, VT,
@@ -5244,6 +5530,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL_PARTS:
case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
+ case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
case ISD::SETCC: return LowerVSETCC(Op, DAG);
case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
@@ -5304,6 +5591,18 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ATOMIC_CMP_SWAP:
ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG);
return;
+ case ISD::ATOMIC_LOAD_MIN:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_UMIN:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_MAX:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_UMAX:
+ ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG);
+ return;
}
if (Res.getNode())
Results.push_back(Res);
@@ -5594,7 +5893,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
// ldrex dest, ptr
// (sign extend dest, if required)
// cmp dest, incr
- // cmov.cond scratch2, dest, incr
+ // cmov.cond scratch2, incr, dest
// strex scratch, scratch2, ptr
// cmp scratch, #0
// bne- loopMBB
@@ -5617,7 +5916,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
.addReg(oldval).addReg(incr));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
- .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR);
+ .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR);
MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
if (strOpc == ARM::t2STREX)
@@ -5643,7 +5942,8 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
MachineBasicBlock *
ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Op1, unsigned Op2,
- bool NeedsCarry, bool IsCmpxchg) const {
+ bool NeedsCarry, bool IsCmpxchg,
+ bool IsMinMax, ARMCC::CondCodes CC) const {
// This also handles ATOMIC_SWAP, indicated by Op1==0.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
@@ -5672,16 +5972,15 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *contBB = 0, *cont2BB = 0;
- if (IsCmpxchg) {
+ if (IsCmpxchg || IsMinMax)
contBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ if (IsCmpxchg)
cont2BB = MF->CreateMachineBasicBlock(LLVM_BB);
- }
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
MF->insert(It, loopMBB);
- if (IsCmpxchg) {
- MF->insert(It, contBB);
- MF->insert(It, cont2BB);
- }
+ if (IsCmpxchg || IsMinMax) MF->insert(It, contBB);
+ if (IsCmpxchg) MF->insert(It, cont2BB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
@@ -5717,12 +6016,32 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
// for ldrexd must be different.
BB = loopMBB;
// Load
+ unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ unsigned GPRPair1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ unsigned GPRPair2;
+ if (IsMinMax) {
+ //We need an extra double register for doing min/max.
+ unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ GPRPair2 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
+ .addReg(undef)
+ .addReg(vallo)
+ .addImm(ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair2)
+ .addReg(r1)
+ .addReg(valhi)
+ .addImm(ARM::gsub_1);
+ }
+
AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
- .addReg(ARM::R2, RegState::Define)
- .addReg(ARM::R3, RegState::Define).addReg(ptr));
+ .addReg(GPRPair0, RegState::Define).addReg(ptr));
// Copy r2/r3 into dest. (This copy will normally be coalesced.)
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2);
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
+ .addReg(GPRPair0, 0, ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
+ .addReg(GPRPair0, 0, ARM::gsub_1);
if (IsCmpxchg) {
// Add early exit
@@ -5741,24 +6060,67 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
// Copy to physregs for strexd
unsigned setlo = MI->getOperand(5).getReg();
unsigned sethi = MI->getOperand(6).getReg();
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(setlo);
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(sethi);
+ unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
+ .addReg(undef)
+ .addReg(setlo)
+ .addImm(ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1)
+ .addReg(r1)
+ .addReg(sethi)
+ .addImm(ARM::gsub_1);
} else if (Op1) {
// Perform binary operation
- AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0)
+ unsigned tmpRegLo = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), tmpRegLo)
.addReg(destlo).addReg(vallo))
.addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry));
- AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), ARM::R1)
- .addReg(desthi).addReg(valhi)).addReg(0);
+ unsigned tmpRegHi = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi)
+ .addReg(desthi).addReg(valhi))
+ .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax));
+
+ unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair);
+ unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
+ .addReg(UndefPair)
+ .addReg(tmpRegLo)
+ .addImm(ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1)
+ .addReg(r1)
+ .addReg(tmpRegHi)
+ .addImm(ARM::gsub_1);
} else {
// Copy to physregs for strexd
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(vallo);
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(valhi);
+ unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
+ .addReg(UndefPair)
+ .addReg(vallo)
+ .addImm(ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1)
+ .addReg(r1)
+ .addReg(valhi)
+ .addImm(ARM::gsub_1);
+ }
+ unsigned GPRPairStore = GPRPair1;
+ if (IsMinMax) {
+ // Compare and branch to exit block.
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(exitMBB).addImm(CC).addReg(ARM::CPSR);
+ BB->addSuccessor(exitMBB);
+ BB->addSuccessor(contBB);
+ BB = contBB;
+ GPRPairStore = GPRPair2;
}
// Store
AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
- .addReg(ARM::R0).addReg(ARM::R1).addReg(ptr));
+ .addReg(GPRPairStore).addReg(ptr));
// Cmp+jump
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(storesuccess).addImm(0));
@@ -5987,12 +6349,15 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
MachineMemOperand::MOLoad |
MachineMemOperand::MOVolatile, 4, 4);
- if (AFI->isThumb1OnlyFunction())
- BuildMI(DispatchBB, dl, TII->get(ARM::tInt_eh_sjlj_dispatchsetup));
- else if (!Subtarget->hasVFP2())
- BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup_nofp));
- else
- BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
+
+ const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
+ const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+
+ // Add a register mask with no preserved registers. This results in all
+ // registers being marked as clobbered.
+ MIB.addRegMask(RI.getNoPreservedMask());
unsigned NumLPads = LPadList.size();
if (Subtarget->isThumb2()) {
@@ -6064,9 +6429,9 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
// MachineConstantPool wants an explicit alignment.
- unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
+ unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
if (Align == 0)
- Align = getTargetData()->getTypeAllocSize(C->getType());
+ Align = getDataLayout()->getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
unsigned VReg1 = MRI->createVirtualRegister(TRC);
@@ -6153,9 +6518,9 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
// MachineConstantPool wants an explicit alignment.
- unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
+ unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
if (Align == 0)
- Align = getTargetData()->getTypeAllocSize(C->getType());
+ Align = getDataLayout()->getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
unsigned VReg1 = MRI->createVirtualRegister(TRC);
@@ -6211,8 +6576,6 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
}
// N.B. the order the invoke BBs are processed in doesn't matter here.
- const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
- const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF);
SmallVector<MachineBasicBlock*, 64> MBBLPads;
for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
@@ -6249,7 +6612,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
DefRegs[OI->getReg()] = true;
}
- MachineInstrBuilder MIB(&*II);
+ MachineInstrBuilder MIB(*MF, &*II);
for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
unsigned Reg = SavedRegs[i];
@@ -6326,7 +6689,9 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
UnitSize = 2;
} else {
// Check whether we can use NEON instructions.
- if (!MF->getFunction()->hasFnAttr(Attribute::NoImplicitFloat) &&
+ if (!MF->getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoImplicitFloat) &&
Subtarget->hasNEON()) {
if ((Align % 16 == 0) && SizeVal >= 16) {
ldrOpc = ARM::VLD1q32wb_fixed;
@@ -6411,7 +6776,8 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
} else {
AddDefaultPred(BuildMI(*BB, MI, dl,
TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
+ .addReg(srcOut, RegState::Define).addReg(srcIn)
+ .addReg(0).addImm(1));
AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
.addReg(scratch).addReg(destIn)
@@ -6474,9 +6840,9 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
// MachineConstantPool wants an explicit alignment.
- unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
+ unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
if (Align == 0)
- Align = getTargetData()->getTypeAllocSize(C->getType());
+ Align = getDataLayout()->getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
@@ -6753,6 +7119,26 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ false, /*IsCmpxchg*/true);
+ case ARM::ATOMMIN6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+ isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+ /*NeedsCarry*/ true, /*IsCmpxchg*/false,
+ /*IsMinMax*/ true, ARMCC::LE);
+ case ARM::ATOMMAX6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+ isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+ /*NeedsCarry*/ true, /*IsCmpxchg*/false,
+ /*IsMinMax*/ true, ARMCC::GE);
+ case ARM::ATOMUMIN6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+ isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+ /*NeedsCarry*/ true, /*IsCmpxchg*/false,
+ /*IsMinMax*/ true, ARMCC::LS);
+ case ARM::ATOMUMAX6432:
+ return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+ isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+ /*NeedsCarry*/ true, /*IsCmpxchg*/false,
+ /*IsMinMax*/ true, ARMCC::HS);
case ARM::tMOVCCr_pseudo: {
// To "insert" a SELECT_CC instruction, we actually have to insert the
@@ -9024,20 +9410,36 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
}
-bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
- if (!Subtarget->allowsUnalignedMem())
- return false;
+bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+ // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
+ bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
switch (VT.getSimpleVT().SimpleTy) {
default:
return false;
case MVT::i8:
case MVT::i16:
- case MVT::i32:
- return true;
+ case MVT::i32: {
+ // Unaligned access can use (for example) LRDB, LRDH, LDR
+ if (AllowsUnaligned) {
+ if (Fast)
+ *Fast = Subtarget->hasV7Ops();
+ return true;
+ }
+ return false;
+ }
case MVT::f64:
- return Subtarget->hasNEON();
- // FIXME: VLD1 etc with standard alignment is legal.
+ case MVT::v2f64: {
+ // For any little-endian targets with neon, we can support unaligned ld/st
+ // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
+ // A big-endian target may also explictly support unaligned accesses
+ if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) {
+ if (Fast)
+ *Fast = true;
+ return true;
+ }
+ return false;
+ }
}
}
@@ -9049,33 +9451,59 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
- bool IsZeroVal,
+ bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
const Function *F = MF.getFunction();
// See if we can use NEON instructions for this...
- if (IsZeroVal &&
- !F->hasFnAttr(Attribute::NoImplicitFloat) &&
- Subtarget->hasNEON()) {
- if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) {
- return MVT::v4i32;
- } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) {
- return MVT::v2i32;
+ if ((!IsMemset || ZeroMemset) &&
+ Subtarget->hasNEON() &&
+ !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoImplicitFloat)) {
+ bool Fast;
+ if (Size >= 16 &&
+ (memOpAlign(SrcAlign, DstAlign, 16) ||
+ (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) && Fast))) {
+ return MVT::v2f64;
+ } else if (Size >= 8 &&
+ (memOpAlign(SrcAlign, DstAlign, 8) ||
+ (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) && Fast))) {
+ return MVT::f64;
}
}
// Lowering to i32/i16 if the size permits.
- if (Size >= 4) {
+ if (Size >= 4)
return MVT::i32;
- } else if (Size >= 2) {
+ else if (Size >= 2)
return MVT::i16;
- }
// Let the target-independent logic figure it out.
return MVT::Other;
}
+bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+ if (Val.getOpcode() != ISD::LOAD)
+ return false;
+
+ EVT VT1 = Val.getValueType();
+ if (!VT1.isSimple() || !VT1.isInteger() ||
+ !VT2.isSimple() || !VT2.isInteger())
+ return false;
+
+ switch (VT1.getSimpleVT().SimpleTy) {
+ default: break;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
+ return true;
+ }
+
+ return false;
+}
+
static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
if (V < 0)
return false;
@@ -9850,7 +10278,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::arm_neon_vld4lane: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
// Conservatively set memVT to the entire set of vectors loaded.
- uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8;
+ uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
@@ -9875,7 +10303,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
- NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8;
+ NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
@@ -9915,3 +10343,4 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return false;
}
+
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 2b8f382..9ee17f0 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -17,11 +17,11 @@
#include "ARM.h"
#include "ARMSubtarget.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include <vector>
namespace llvm {
@@ -232,7 +232,11 @@ namespace llvm {
ATOMAND64_DAG,
ATOMNAND64_DAG,
ATOMSWAP64_DAG,
- ATOMCMPXCHG64_DAG
+ ATOMCMPXCHG64_DAG,
+ ATOMMIN64_DAG,
+ ATOMUMIN64_DAG,
+ ATOMMAX64_DAG,
+ ATOMUMAX64_DAG
};
}
@@ -248,7 +252,7 @@ namespace llvm {
public:
explicit ARMTargetLowering(TargetMachine &TM);
- virtual unsigned getJumpTableEncoding(void) const;
+ virtual unsigned getJumpTableEncoding() const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
@@ -281,15 +285,19 @@ namespace llvm {
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const;
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
- /// unaligned memory accesses. of the specified type.
- virtual bool allowsUnalignedMemoryAccesses(EVT VT) const;
+ /// unaligned memory accesses of the specified type. Returns whether it
+ /// is "fast" by reference in the second argument.
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
virtual EVT getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
- bool IsZeroVal,
+ bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const;
+ using TargetLowering::isZExtFree;
+ virtual bool isZExtFree(SDValue Val, EVT VT2) const;
+
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
@@ -358,7 +366,7 @@ namespace llvm {
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
- virtual const TargetRegisterClass *getRegClassFor(EVT VT) const;
+ virtual const TargetRegisterClass *getRegClassFor(MVT VT) const;
/// getMaximalGlobalOffset - Returns the maximal possible offset which can
/// be used for loads / stores from the global.
@@ -384,7 +392,7 @@ namespace llvm {
unsigned Intrinsic) const;
protected:
std::pair<const TargetRegisterClass*, uint8_t>
- findRepresentativeClass(EVT VT) const;
+ findRepresentativeClass(MVT VT) const;
private:
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
@@ -466,7 +474,11 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals) const;
void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
- DebugLoc dl, SDValue &Chain, unsigned ArgOffset)
+ DebugLoc dl, SDValue &Chain,
+ const Value *OrigArg,
+ unsigned OffsetFromOrigArg,
+ unsigned ArgOffset,
+ bool ForceMutable = false)
const;
void computeRegArea(CCState &CCInfo, MachineFunction &MF,
@@ -477,7 +489,7 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals) const;
/// HandleByVal - Target-specific cleanup for ByVal support.
- virtual void HandleByVal(CCState *, unsigned &) const;
+ virtual void HandleByVal(CCState *, unsigned &, unsigned) const;
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
@@ -491,6 +503,12 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const;
+
+ virtual bool CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const;
+
virtual SDValue
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -522,7 +540,9 @@ namespace llvm {
unsigned Op1,
unsigned Op2,
bool NeedsCarry = false,
- bool IsCmpxchg = false) const;
+ bool IsCmpxchg = false,
+ bool IsMinMax = false,
+ ARMCC::CondCodes CC = ARMCC::AL) const;
MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size,
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index c8966fb..67a6820 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -846,6 +846,23 @@ class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
let Inst{3-0} = Rm;
}
+// Division instructions.
+class ADivA1I<bits<3> opcod, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{27-23} = 0b01110;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-8} = Rm;
+ let Inst{7-4} = 0b0001;
+ let Inst{3-0} = Rn;
+}
+
// PKH instructions
def PKHLSLAsmOperand : ImmAsmOperand {
let Name = "PKHLSLImm";
@@ -893,6 +910,10 @@ class ARMV5TPat<dag pattern, dag result> : Pat<pattern, result> {
class ARMV5TEPat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsARM, HasV5TE];
}
+// ARMV5MOPat - Same as ARMV5TEPat with UseMulOps.
+class ARMV5MOPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV5TE, UseMulOps];
+}
class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsARM, HasV6];
}
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 31b0c41..80f0ec7 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -13,13 +13,17 @@
#include "ARMInstrInfo.h"
#include "ARM.h"
+#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMTargetMachine.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
using namespace llvm;
@@ -84,3 +88,61 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
+
+namespace {
+ /// ARMCGBR - Create Global Base Reg pass. This initializes the PIC
+ /// global base register for ARM ELF.
+ struct ARMCGBR : public MachineFunctionPass {
+ static char ID;
+ ARMCGBR() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (AFI->getGlobalBaseReg() == 0)
+ return false;
+
+ const ARMTargetMachine *TM =
+ static_cast<const ARMTargetMachine *>(&MF.getTarget());
+ if (TM->getRelocationModel() != Reloc::PIC_)
+ return false;
+
+ LLVMContext* Context = &MF.getFunction()->getContext();
+ GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false,
+ GlobalValue::ExternalLinkage, 0,
+ "_GLOBAL_OFFSET_TABLE_");
+ unsigned Id = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id);
+ unsigned Align = TM->getDataLayout()->getPrefTypeAlignment(GV->getType());
+ unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align);
+
+ MachineBasicBlock &FirstMBB = MF.front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
+ unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
+ unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ?
+ ARM::t2LDRpci : ARM::LDRcp;
+ const TargetInstrInfo &TII = *TM->getInstrInfo();
+ MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL,
+ TII.get(Opc), GlobalBaseReg)
+ .addConstantPoolIndex(Idx);
+ if (Opc == ARM::LDRcp)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
+
+ return true;
+ }
+
+ virtual const char *getPassName() const {
+ return "ARM PIC Global Base Reg Initialization";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char ARMCGBR::ID = 0;
+FunctionPass*
+llvm::createARMGlobalBaseRegPass() { return new ARMCGBR(); }
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index e23989e..12712c0 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -207,6 +207,8 @@ def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<"FeatureFP16","half-float">;
def HasDivide : Predicate<"Subtarget->hasDivide()">,
AssemblerPredicate<"FeatureHWDiv", "divide">;
+def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
+ AssemblerPredicate<"FeatureHWDivARM">;
def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
AssemblerPredicate<"FeatureT2XtPk",
"pack/extract">;
@@ -242,6 +244,7 @@ def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
def UseMovt : Predicate<"Subtarget->useMovt()">;
def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
+def UseMulOps : Predicate<"Subtarget->useMulOps()">;
// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
// But only select them if more precision in FP computation is allowed.
@@ -252,6 +255,20 @@ def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || "
"Subtarget->isTargetDarwin()">;
+// VGETLNi32 is microcoded on Swift - prefer VMOV.
+def HasFastVGETLNi32 : Predicate<"!Subtarget->isSwift()">;
+def HasSlowVGETLNi32 : Predicate<"Subtarget->isSwift()">;
+
+// VDUP.32 is microcoded on Swift - prefer VMOV.
+def HasFastVDUP32 : Predicate<"!Subtarget->isSwift()">;
+def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">;
+
+// Cortex-A9 prefers VMOVSR to VMOVDRR even when using NEON for scalar FP, as
+// this allows more effective execution domain optimization. See
+// setExecutionDomain().
+def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">;
+
def IsLE : Predicate<"TLI.isLittleEndian()">;
def IsBE : Predicate<"TLI.isBigEndian()">;
@@ -266,15 +283,13 @@ class RegConstraint<string C> {
// ARM specific transformation functions and pattern fragments.
//
-// imm_neg_XFORM - Return a imm value packed into the format described for
-// imm_neg defs below.
+// imm_neg_XFORM - Return the negation of an i32 immediate value.
def imm_neg_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
}]>;
-// so_imm_not_XFORM - Return a so_imm value packed into the format described for
-// so_imm_not def below.
-def so_imm_not_XFORM : SDNodeXForm<imm, [{
+// imm_not_XFORM - Return the complement of a i32 immediate value.
+def imm_not_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
}]>;
@@ -297,7 +312,7 @@ def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; }
def so_imm_not : Operand<i32>, PatLeaf<(imm), [{
return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
- }], so_imm_not_XFORM> {
+ }], imm_not_XFORM> {
let ParserMatchClass = so_imm_not_asmoperand;
}
@@ -402,6 +417,8 @@ def reglist : Operand<i32> {
let DecoderMethod = "DecodeRegListOperand";
}
+def GPRPairOp : RegisterOperand<GPRPair, "printGPRPairOperand">;
+
def DPRRegListAsmOperand : AsmOperandClass { let Name = "DPRRegList"; }
def dpr_reglist : Operand<i32> {
let EncoderMethod = "getRegisterListOpValue";
@@ -1607,6 +1624,18 @@ def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$cmp1, GPR:$cmp2,
GPR:$set1, GPR:$set2),
NoItinerary, []>;
+def ATOMMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMUMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
+def ATOMUMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2),
+ NoItinerary, []>;
}
def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary,
@@ -3092,15 +3121,19 @@ def : ARMPat<(ARMaddc GPR:$src, so_imm_neg:$imm),
(SUBSri GPR:$src, so_imm_neg:$imm)>;
def : ARMPat<(add GPR:$src, imm0_65535_neg:$imm),
- (SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>;
+ (SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>,
+ Requires<[IsARM, HasV6T2]>;
def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm),
- (SUBSrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>;
+ (SUBSrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>,
+ Requires<[IsARM, HasV6T2]>;
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
def : ARMPat<(ARMadde GPR:$src, so_imm_not:$imm, CPSR),
(SBCri GPR:$src, so_imm_not:$imm)>;
+def : ARMPat<(ARMadde GPR:$src, imm0_65535_neg:$imm, CPSR),
+ (SBCrr GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>;
// Note: These are implemented in C++ code, because they have to generate
// ADD/SUBrs instructions, which use a complex pattern that a xform function
@@ -3444,13 +3477,13 @@ def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm,
4, IIC_iMUL32,
[(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))],
(MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ Requires<[IsARM, NoV6, UseMulOps]>;
}
def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6, UseMulOps]> {
bits<4> Ra;
let Inst{15-12} = Ra;
}
@@ -3466,7 +3499,7 @@ def MLAv5: ARMPseudoExpand<(outs GPR:$Rd),
def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>,
- Requires<[IsARM, HasV6T2]> {
+ Requires<[IsARM, HasV6T2, UseMulOps]> {
bits<4> Rd;
bits<4> Rm;
bits<4> Rn;
@@ -3572,7 +3605,7 @@ def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6, UseMulOps]>;
def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
@@ -3582,7 +3615,7 @@ def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6, UseMulOps]>;
def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
@@ -3636,7 +3669,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
[(set GPRnopc:$Rd, (add GPR:$Ra,
(opnode (sext_inreg GPRnopc:$Rn, i16),
(sext_inreg GPRnopc:$Rm, i16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -3644,7 +3677,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16),
(sra GPRnopc:$Rm, (i32 16)))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -3652,7 +3685,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)),
(sext_inreg GPRnopc:$Rm, i16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -3660,7 +3693,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)),
(sra GPRnopc:$Rm, (i32 16)))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -3668,7 +3701,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
(sext_inreg GPRnopc:$Rm, i16)), (i32 16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -3676,7 +3709,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
(sra GPRnopc:$Rm, (i32 16))), (i32 16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
}
}
@@ -3779,6 +3812,19 @@ defm SMUA : AI_sdml<0, "smua">;
defm SMUS : AI_sdml<1, "smus">;
//===----------------------------------------------------------------------===//
+// Division Instructions (ARMv7-A with virtualization extension)
+//
+def SDIV : ADivA1I<0b001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
+ "sdiv", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (sdiv GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasDivideInARM]>;
+
+def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
+ "udiv", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (udiv GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasDivideInARM]>;
+
+//===----------------------------------------------------------------------===//
// Misc. Arithmetic Instructions.
//
@@ -4197,8 +4243,8 @@ def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
NoItinerary, "ldrex", "\t$Rt, $addr", []>;
let hasExtraDefRegAllocReq = 1 in
-def LDREXD: AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2),(ins addr_offset_none:$addr),
- NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []> {
+def LDREXD: AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr),
+ NoItinerary, "ldrexd", "\t$Rt, $addr", []> {
let DecoderMethod = "DecodeDoubleRegLoad";
}
}
@@ -4212,8 +4258,8 @@ def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>;
let hasExtraSrcRegAllocReq = 1 in
def STREXD : AIstrex<0b01, (outs GPR:$Rd),
- (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr),
- NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []> {
+ (ins GPRPairOp:$Rt, addr_offset_none:$addr),
+ NoItinerary, "strexd", "\t$Rd, $Rt, $addr", []> {
let DecoderMethod = "DecodeDoubleRegStore";
}
}
@@ -4711,21 +4757,13 @@ def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
Requires<[IsARM, IsIOS]>;
}
-// eh.sjlj.dispatchsetup pseudo-instructions.
-// These pseudos are used for both ARM and Thumb2. Any differences are
-// handled when the pseudo is expanded (which happens before any passes
-// that need the instruction size).
-let Defs =
- [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ],
- isBarrier = 1 in
+// eh.sjlj.dispatchsetup pseudo-instruction.
+// This pseudo is used for both ARM and Thumb. Any differences are handled when
+// the pseudo is expanded (which happens before any passes that need the
+// instruction size).
+let isBarrier = 1 in
def Int_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>;
-let Defs =
- [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ],
- isBarrier = 1 in
-def Int_eh_sjlj_dispatchsetup_nofp : PseudoInst<(outs), (ins), NoItinerary, []>;
-
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -4829,32 +4867,32 @@ def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)),
(SMULWB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
(sra (shl GPR:$b, (i32 16)), (i32 16)))),
(SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul sext_16_node:$a, sext_16_node:$b)),
(SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
(sra GPR:$b, (i32 16)))),
(SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
(SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul (sra GPR:$a, (i32 16)),
(sra (shl GPR:$b, (i32 16)), (i32 16)))),
(SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
(SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
(i32 16))),
(SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(sra (mul GPR:$a, sext_16_node:$b), (i32 16))),
(SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 8158a11..697a8d2 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -398,6 +398,20 @@ def VecListFourQWordIndexed : Operand<i32> {
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
+def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 8;
+}]>;
+def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 8;
+}]>;
+def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() == 4;
+}]>;
+def word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() == 4;
+}]>;
def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() == 2;
}]>;
@@ -2273,6 +2287,25 @@ def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
(VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>;
+// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64
+// load / store if it's legal.
+def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
+ (VLD1q64 addrmode6:$addr)>;
+def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+ (VST1q64 addrmode6:$addr, QPR:$value)>;
+def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
+ (VLD1q32 addrmode6:$addr)>;
+def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+ (VST1q32 addrmode6:$addr, QPR:$value)>;
+def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
+ (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+ (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
+def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
+ (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+ (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
+
//===----------------------------------------------------------------------===//
// NEON pattern fragments
//===----------------------------------------------------------------------===//
@@ -4455,10 +4488,36 @@ def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
"vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[(set DPR:$Vd,
(v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
+def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
+ (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
+ (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
+ (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
+ (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
+ (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
(and DPR:$Vm, (vnotd DPR:$Vd)))),
- (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
+ (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+
+def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
+ (and DPR:$Vm, (vnotd DPR:$Vd)))),
+ (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
(ins QPR:$src1, QPR:$Vn, QPR:$Vm),
@@ -4467,9 +4526,35 @@ def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
[(set QPR:$Vd,
(v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
+def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
+ (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
+ (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
+ (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
+ (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
+ (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+
def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
(and QPR:$Vm, (vnotq QPR:$Vd)))),
- (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
+ (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
+ (and QPR:$Vm, (vnotq QPR:$Vd)))),
+ (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
// VBIF : Vector Bitwise Insert if False
// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
@@ -4792,12 +4877,15 @@ defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
int_arm_neon_vabs>;
-def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
- IIC_VUNAD, "vabs", "f32",
- v2f32, v2f32, int_arm_neon_vabs>;
-def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
- IIC_VUNAQ, "vabs", "f32",
- v4f32, v4f32, int_arm_neon_vabs>;
+def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+ "vabs", "f32",
+ v2f32, v2f32, fabs>;
+def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+ "vabs", "f32",
+ v4f32, v4f32, fabs>;
+
+def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>;
+def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>;
// VQABS : Vector Saturating Absolute Value
defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
@@ -4983,7 +5071,8 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
(outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
[(set GPR:$R, (extractelt (v2i32 DPR:$V),
- imm:$lane))]> {
+ imm:$lane))]>,
+ Requires<[HasNEON, HasFastVGETLNi32]> {
let Inst{21} = lane{0};
}
// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
@@ -5006,7 +5095,16 @@ def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
(VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i32_reg imm:$lane))),
- (SubReg_i32_lane imm:$lane))>;
+ (SubReg_i32_lane imm:$lane))>,
+ Requires<[HasNEON, HasFastVGETLNi32]>;
+def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane),
+ (COPY_TO_REGCLASS
+ (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
+ Requires<[HasNEON, HasSlowVGETLNi32]>;
+def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
+ (COPY_TO_REGCLASS
+ (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
+ Requires<[HasNEON, HasSlowVGETLNi32]>;
def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
(EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
(SSubReg_f32_reg imm:$src2))>;
@@ -5045,23 +5143,25 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
GPR:$R, imm:$lane))]> {
let Inst{21} = lane{0};
}
-
-def VSETLNi8Q : PseudoNeonI<(outs QPR:$V),
- (ins QPR:$src1, GPR:$R, VectorIndex8:$lane),
- IIC_VMOVISL, "",
- [(set QPR:$V, (vector_insert (v16i8 QPR:$src1),
- GPR:$R, imm:$lane))]>;
-def VSETLNi16Q : PseudoNeonI<(outs QPR:$V),
- (ins QPR:$src1, GPR:$R, VectorIndex16:$lane),
- IIC_VMOVISL, "",
- [(set QPR:$V, (vector_insert (v8i16 QPR:$src1),
- GPR:$R, imm:$lane))]>;
}
-
+def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
+ (v16i8 (INSERT_SUBREG QPR:$src1,
+ (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i8_reg imm:$lane))),
+ GPR:$src2, (SubReg_i8_lane imm:$lane))),
+ (DSubReg_i8_reg imm:$lane)))>;
+def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
+ (v8i16 (INSERT_SUBREG QPR:$src1,
+ (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i16_reg imm:$lane))),
+ GPR:$src2, (SubReg_i16_lane imm:$lane))),
+ (DSubReg_i16_reg imm:$lane)))>;
def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
- (v4i32 (INSERT_SUBREG QPR:$src1,
- GPR:$src2,
- (SSubReg_f32_reg imm:$lane)))>;
+ (v4i32 (INSERT_SUBREG QPR:$src1,
+ (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i32_reg imm:$lane))),
+ GPR:$src2, (SubReg_i32_lane imm:$lane))),
+ (DSubReg_i32_reg imm:$lane)))>;
def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
(INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
@@ -5115,14 +5215,23 @@ class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
-def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>;
+def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>,
+ Requires<[HasNEON, HasFastVDUP32]>;
def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
-def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>;
+// NEONvdup patterns for uarchs with fast VDUP.32.
+def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
+ Requires<[HasNEON,HasFastVDUP32]>;
def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
+// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
+def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
+ Requires<[HasNEON,HasSlowVDUP32]>;
+def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
+ Requires<[HasNEON,HasSlowVDUP32]>;
+
// VDUP : Vector Duplicate Lane (from scalar to all elements)
class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
@@ -5559,6 +5668,11 @@ def : N2VSPat<arm_ftoui, VCVTf2ud>;
def : N2VSPat<arm_sitof, VCVTs2fd>;
def : N2VSPat<arm_uitof, VCVTu2fd>;
+// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
+def : Pat<(f32 (bitconvert GPR:$a)),
+ (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
+ Requires<[HasNEON, DontUseVMOVSR]>;
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index e171f8b..ae7a5c0 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -223,6 +223,7 @@ def t_addrmode_sp : Operand<i32>,
def t_addrmode_pc : Operand<i32> {
let EncoderMethod = "getAddrModePCOpValue";
let DecoderMethod = "DecodeThumbAddrModePC";
+ let PrintMethod = "printThumbLdrLabelOperand";
}
//===----------------------------------------------------------------------===//
@@ -1246,10 +1247,6 @@ def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
[(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
Requires<[IsThumb, IsIOS]>;
-let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12, CPSR ],
- isBarrier = 1 in
-def tInt_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>;
-
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index f1a6cce..cf8b302 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -130,8 +130,9 @@ def imm0_4095_neg : Operand<i32>, PatLeaf<(i32 imm), [{
let ParserMatchClass = imm0_4095_neg_asmoperand;
}
-def imm0_255_neg : PatLeaf<(i32 imm), [{
- return (uint32_t)(-N->getZExtValue()) < 255;
+def imm1_255_neg : PatLeaf<(i32 imm), [{
+ uint32_t Val = -N->getZExtValue();
+ return (Val > 0 && Val < 255);
}], imm_neg_XFORM>;
def imm0_255_not : PatLeaf<(i32 imm), [{
@@ -159,7 +160,7 @@ def t2addrmode_imm12 : Operand<i32>,
// t2ldrlabel := imm12
def t2ldrlabel : Operand<i32> {
let EncoderMethod = "getAddrModeImm12OpValue";
- let PrintMethod = "printT2LdrLabelOperand";
+ let PrintMethod = "printThumbLdrLabelOperand";
}
def t2ldr_pcrel_imm12_asmoperand : AsmOperandClass {let Name = "MemPCRelImm12";}
@@ -1928,8 +1929,8 @@ defm t2RSBS : T2I_rbin_s_is <BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
// The AddedComplexity preferences the first variant over the others since
// it can be shrunk to a 16-bit wide encoding, while the others cannot.
let AddedComplexity = 1 in
-def : T2Pat<(add GPR:$src, imm0_255_neg:$imm),
- (t2SUBri GPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(add GPR:$src, imm1_255_neg:$imm),
+ (t2SUBri GPR:$src, imm1_255_neg:$imm)>;
def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
(t2SUBri GPR:$src, t2_so_imm_neg:$imm)>;
def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
@@ -1938,8 +1939,8 @@ def : T2Pat<(add GPR:$src, imm0_65535_neg:$imm),
(t2SUBrr GPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
let AddedComplexity = 1 in
-def : T2Pat<(ARMaddc rGPR:$src, imm0_255_neg:$imm),
- (t2SUBSri rGPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(ARMaddc rGPR:$src, imm1_255_neg:$imm),
+ (t2SUBSri rGPR:$src, imm1_255_neg:$imm)>;
def : T2Pat<(ARMaddc rGPR:$src, t2_so_imm_neg:$imm),
(t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>;
def : T2Pat<(ARMaddc rGPR:$src, imm0_65535_neg:$imm),
@@ -1953,7 +1954,7 @@ def : T2Pat<(ARMadde rGPR:$src, imm0_255_not:$imm, CPSR),
def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR),
(t2SBCri rGPR:$src, t2_so_imm_not:$imm)>;
def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR),
- (t2SBCrr rGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
+ (t2SBCrr rGPR:$src, (t2MOVi16 (imm_not_XFORM imm:$imm)))>;
// Select Bytes -- for disassembly only
@@ -2314,13 +2315,15 @@ defm t2ORN : T2I_bin_irs<0b0011, "orn",
/// changed to modify CPSR.
multiclass T2I_un_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, bit Cheap = 0, bit ReMat = 0> {
+ PatFrag opnode,
+ bit Cheap = 0, bit ReMat = 0, bit MoveImm = 0> {
// shifted imm
def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii,
opc, "\t$Rd, $imm",
[(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> {
let isAsCheapAsAMove = Cheap;
let isReMaterializable = ReMat;
+ let isMoveImm = MoveImm;
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -2354,7 +2357,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc,
let AddedComplexity = 1 in
defm t2MVN : T2I_un_irs <0b0011, "mvn",
IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi,
- UnOpFrag<(not node:$Src)>, 1, 1>;
+ UnOpFrag<(not node:$Src)>, 1, 1, 1>;
let AddedComplexity = 1 in
def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
@@ -2396,7 +2399,8 @@ def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
def t2MLA: T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"mla", "\t$Rd, $Rn, $Rm, $Ra",
- [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]> {
+ [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]>,
+ Requires<[IsThumb2, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -2406,7 +2410,8 @@ def t2MLA: T2FourReg<
def t2MLS: T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"mls", "\t$Rd, $Rn, $Rm, $Ra",
- [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]> {
+ [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]>,
+ Requires<[IsThumb2, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -2475,7 +2480,7 @@ def t2SMMLA : T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"smmla", "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -2496,7 +2501,7 @@ def t2SMMLS: T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"smmls", "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b110;
@@ -2601,7 +2606,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
[(set rGPR:$Rd, (add rGPR:$Ra,
(opnode (sext_inreg rGPR:$Rn, i16),
(sext_inreg rGPR:$Rm, i16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2614,7 +2619,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
(sra rGPR:$Rm, (i32 16)))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2627,7 +2632,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
(sext_inreg rGPR:$Rm, i16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2640,7 +2645,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
(sra rGPR:$Rm, (i32 16)))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2653,7 +2658,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
(sext_inreg rGPR:$Rm, i16)), (i32 16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2666,7 +2671,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
(sra rGPR:$Rm, (i32 16))), (i32 16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2760,7 +2765,7 @@ def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
// Division Instructions.
// Signed and unsigned division on v7-M
//
-def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"sdiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
Requires<[HasDivide, IsThumb2]> {
@@ -2771,7 +2776,7 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
let Inst{7-4} = 0b1111;
}
-def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"udiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
Requires<[HasDivide, IsThumb2]> {
@@ -3243,11 +3248,11 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
let Inst{15-14} = 0b10;
let Inst{12} = 1;
- bits<20> target;
+ bits<24> target;
let Inst{26} = target{19};
let Inst{11} = target{18};
let Inst{13} = target{17};
- let Inst{21-16} = target{16-11};
+ let Inst{25-16} = target{20-11};
let Inst{10-0} = target{10-0};
let DecoderMethod = "DecodeT2BInstruction";
}
@@ -3329,20 +3334,6 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
Requires<[IsThumb2, IsIOS]>;
}
-let isCall = 1, Defs = [LR], Uses = [SP] in {
- // mov lr, pc; b if callee is marked noreturn to avoid confusing the
- // return stack predictor.
- def t2BMOVPCB_CALL : tPseudoInst<(outs),
- (ins t_bltarget:$func),
- 6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>,
- Requires<[IsThumb]>;
-}
-
-// Direct calls
-def : T2Pat<(ARMcall_nolink texternalsym:$func),
- (t2BMOVPCB_CALL texternalsym:$func)>,
- Requires<[IsThumb]>;
-
// IT block
let Defs = [ITSTATE] in
def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 7d6692f..b5a896c 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -523,10 +523,12 @@ def VMOVRS : AVConv2I<0b11100001, 0b1010,
let D = VFPNeonDomain;
}
+// Bitcast i32 -> f32. NEON prefers to use VMOVDRR.
def VMOVSR : AVConv4I<0b11100000, 0b1010,
(outs SPR:$Sn), (ins GPR:$Rt),
IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
- [(set SPR:$Sn, (bitconvert GPR:$Rt))]> {
+ [(set SPR:$Sn, (bitconvert GPR:$Rt))]>,
+ Requires<[HasVFP2, UseVMOVSR]> {
// Instruction operands.
bits<5> Sn;
bits<4> Rt;
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 2dc49a9..a5642d6 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -17,13 +17,12 @@
#include "ARMConstantPoolValue.h"
#include "ARMRelocations.h"
#include "ARMSubtarget.h"
-#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Memory.h"
+#include "llvm/Support/raw_ostream.h"
#include <cstdlib>
using namespace llvm;
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
index 7928184..23a6a9b 100644
--- a/lib/Target/ARM/ARMJITInfo.h
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -15,12 +15,12 @@
#define ARMJITINFO_H
#include "ARMMachineFunctionInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/Target/TargetJITInfo.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
namespace llvm {
class ARMTargetMachine;
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 897ceb6..a1c21ee 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -18,8 +18,12 @@
#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -27,19 +31,15 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumLDMGened , "Number of ldm instructions generated");
@@ -1408,7 +1408,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
PrevMI->setDesc(TII->get(NewOpc));
MO.setReg(ARM::PC);
- PrevMI->copyImplicitOps(&*MBBI);
+ PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
MBB.erase(MBBI);
return true;
}
@@ -1448,7 +1448,7 @@ namespace {
static char ID;
ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
- const TargetData *TD;
+ const DataLayout *TD;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const ARMSubtarget *STI;
@@ -1478,7 +1478,7 @@ namespace {
}
bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
- TD = Fn.getTarget().getTargetData();
+ TD = Fn.getTarget().getDataLayout();
TII = Fn.getTarget().getInstrInfo();
TRI = Fn.getTarget().getRegisterInfo();
STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index e2ac9a4..b641483 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -15,8 +15,8 @@
#include "ARM.h"
#include "ARMAsmPrinter.h"
#include "MCTargetDesc/ARMMCExpr.h"
-#include "llvm/Constants.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Constants.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Target/Mangler.h"
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index f1c8fc8..88d96c0 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -15,10 +15,10 @@
#define ARMMACHINEFUNCTIONINFO_H
#include "ARMSubtarget.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/BitVector.h"
+#include "llvm/Target/TargetRegisterInfo.h"
namespace llvm {
@@ -108,6 +108,11 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// pass.
DenseMap<unsigned, unsigned> CPEClones;
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
public:
ARMFunctionInfo() :
isThumb(false),
@@ -119,7 +124,7 @@ public:
GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
NumAlignedDPRCS2Regs(0),
JumpTableUId(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false) {}
+ VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
@@ -130,7 +135,7 @@ public:
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
JumpTableUId(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false) {}
+ VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
bool isThumbFunction() const { return isThumb; }
bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
@@ -249,6 +254,9 @@ public:
bool hasITBlocks() const { return HasITBlocks; }
void setHasITBlocks(bool h) { HasITBlocks = h; }
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
assert(0 && "Duplicate entries!");
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 6f974fd..b0f576b 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -49,6 +49,9 @@ def ssub_0 : SubRegIndex;
def ssub_1 : SubRegIndex;
def ssub_2 : SubRegIndex<[dsub_1, ssub_0]>;
def ssub_3 : SubRegIndex<[dsub_1, ssub_1]>;
+
+def gsub_0 : SubRegIndex;
+def gsub_1 : SubRegIndex;
// Let TableGen synthesize the remaining 12 ssub_* indices.
// We don't need to name them.
}
@@ -247,11 +250,16 @@ def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
}
// Scalar single precision floating point register class..
-def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>;
+// FIXME: Allocation order changed to s0, s2, s4, ... as a quick hack to
+// avoid partial-write dependencies on D registers (S registers are
+// renamed as portions of D registers).
+def SPR : RegisterClass<"ARM", [f32], 32, (add (decimate
+ (sequence "S%u", 0, 31), 2),
+ (sequence "S%u", 0, 31))>;
// Subset of SPR which can be used as a source of NEON scalars for 16-bit
// operations
-def SPR_8 : RegisterClass<"ARM", [f32], 32, (trunc SPR, 16)>;
+def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>;
// Scalar double precision floating point / generic 64-bit vector register
// class.
@@ -308,6 +316,17 @@ def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
let AltOrderSelect = [{ return 1; }];
}
+// Pseudo-registers representing even-odd pairs of GPRs from R1 to R13/SP.
+// These are needed by instructions (e.g. ldrexd/strexd) requiring even-odd GPRs.
+def Tuples2R : RegisterTuples<[gsub_0, gsub_1],
+ [(add R0, R2, R4, R6, R8, R10, R12),
+ (add R1, R3, R5, R7, R9, R11, SP)]>;
+
+// Register class representing a pair of even-odd GPRs.
+def GPRPair : RegisterClass<"ARM", [untyped], 64, (add Tuples2R)> {
+ let Size = 64; // 2 x 32 bits, we have no predefined type of that size.
+}
+
// Pseudo-registers representing 3 consecutive D registers.
def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2],
[(shl DPR, 0),
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 81d2fa3..02196d0 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -55,6 +55,7 @@ def IIC_iMUL32 : InstrItinClass;
def IIC_iMAC32 : InstrItinClass;
def IIC_iMUL64 : InstrItinClass;
def IIC_iMAC64 : InstrItinClass;
+def IIC_iDIV : InstrItinClass;
def IIC_iLoad_i : InstrItinClass;
def IIC_iLoad_r : InstrItinClass;
def IIC_iLoad_si : InstrItinClass;
@@ -261,3 +262,4 @@ def IIC_VTBX4 : InstrItinClass;
include "ARMScheduleV6.td"
include "ARMScheduleA8.td"
include "ARMScheduleA9.td"
+include "ARMScheduleSwift.td"
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 7bc590f..404634f 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1876,8 +1876,9 @@ def CortexA9Itineraries : ProcessorItineraries<
]>;
// ===---------------------------------------------------------------------===//
-// This following definitions describe the simple machine model which
-// will replace itineraries.
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler and will eventually replace itineraries.
+
// Cortex-A9 machine model for scheduling and other instruction cost heuristics.
def CortexA9Model : SchedMachineModel {
@@ -1891,5 +1892,595 @@ def CortexA9Model : SchedMachineModel {
let Itineraries = CortexA9Itineraries;
}
-// TODO: Add Cortex-A9 processor and scheduler resources.
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+def A9UnitALU : ProcResource<2>;
+def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; }
+def A9UnitAGU : ProcResource<1>;
+def A9UnitLS : ProcResource<1>;
+def A9UnitFP : ProcResource<1> { let Buffered = 0; }
+def A9UnitB : ProcResource<1>;
+
+//===----------------------------------------------------------------------===//
+// Define scheduler read/write types with their resources and latency on A9.
+
+// Consume an issue slot, but no processor resources. This is useful when all
+// other writes associated with the operand have NumMicroOps = 0.
+def A9WriteIssue : SchedWriteRes<[]> { let Latency = 0; }
+
+// Write an integer register.
+def A9WriteI : SchedWriteRes<[A9UnitALU]>;
+// Write an integer shifted-by register
+def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
+
+// Basic ALU.
+def A9WriteA : SchedWriteRes<[A9UnitALU]>;
+// ALU with operand shifted by immediate.
+def A9WriteAsi : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
+// ALU with operand shifted by register.
+def A9WriteAsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
+
+// Multiplication
+def A9WriteM : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; }
+def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5;
+ let NumMicroOps = 0; }
+def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; }
+def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4;
+ let NumMicroOps = 0; }
+
+// Floating-point
+// Only one FP or AGU instruction may issue per cycle. We model this
+// by having FP instructions consume the AGU resource.
+def A9WriteF : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
+def A9WriteFMov : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
+def A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
+def A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
+def A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; }
+def A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
+def A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; }
+def A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; }
+def A9WriteFSqrtS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 17; }
+def A9WriteFSqrtD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 32; }
+
+// NEON has an odd mix of latencies. Simply name the write types by latency.
+def A9WriteV1 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
+def A9WriteV2 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 2; }
+def A9WriteV3 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 3; }
+def A9WriteV4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
+def A9WriteV5 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
+def A9WriteV6 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
+def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; }
+def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
+def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; }
+
+// Reserve A9UnitFP for 2 consecutive cycles.
+def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
+ let Latency = 4;
+ let ResourceCycles = [2];
+}
+def A9Write2V7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
+ let Latency = 7;
+ let ResourceCycles = [2];
+}
+def A9Write2V9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
+ let Latency = 9;
+ let ResourceCycles = [2];
+}
+
+// Branches don't have a def operand but still consume resources.
+def A9WriteB : SchedWriteRes<[A9UnitB]>;
+
+// Address generation.
+def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; }
+
+// Load Integer.
+def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; }
+// Load the upper 32-bits using the same micro-op.
+def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3;
+ let NumMicroOps = 0; }
+// Offset shifted by register.
+def A9WriteLsi : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
+// Load (and zero extend) a byte.
+def A9WriteLb : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
+def A9WriteLbsi : SchedWriteRes<[A9UnitLS]> { let Latency = 5; }
+
+// Load or Store Float, aligned.
+def A9WriteLSfp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 1; }
+
+// Store Integer.
+def A9WriteS : SchedWriteRes<[A9UnitLS]>;
+
+//===----------------------------------------------------------------------===//
+// Define resources dynamically for load multiple variants.
+
+// Define helpers for extra latency without consuming resources.
+def A9WriteCycle1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; }
+foreach NumCycles = 2-8 in {
+def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>;
+} // foreach NumCycles
+
+// Define TII for use in SchedVariant Predicates.
+def : PredicateProlog<[{
+ const ARMBaseInstrInfo *TII =
+ static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
+ (void)TII;
+}]>;
+
+// Define address generation sequences and predicates for 8 flavors of LDMs.
+foreach NumAddr = 1-8 in {
+
+// Define A9WriteAdr1-8 as a sequence of A9WriteAdr with additive
+// latency for instructions that generate multiple loads or stores.
+def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>;
+
+// Define a predicate to select the LDM based on number of memory addresses.
+def A9LMAdr#NumAddr#Pred :
+ SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>;
+
+} // foreach NumAddr
+
+// Fall-back for unknown LDMs.
+def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(MI) == 0">;
+
+// LDM/VLDM/VLDn address generation latency & resources.
+// Dynamically select the A9WriteAdrN sequence using a predicate.
+def A9WriteLMAdr : SchedWriteVariant<[
+ SchedVar<A9LMAdr1Pred, [A9WriteAdr1]>,
+ SchedVar<A9LMAdr2Pred, [A9WriteAdr2]>,
+ SchedVar<A9LMAdr3Pred, [A9WriteAdr3]>,
+ SchedVar<A9LMAdr4Pred, [A9WriteAdr4]>,
+ SchedVar<A9LMAdr5Pred, [A9WriteAdr5]>,
+ SchedVar<A9LMAdr6Pred, [A9WriteAdr6]>,
+ SchedVar<A9LMAdr7Pred, [A9WriteAdr7]>,
+ SchedVar<A9LMAdr8Pred, [A9WriteAdr8]>,
+ // For unknown LDM/VLDM/VSTM, assume 2 32-bit registers.
+ SchedVar<A9LMUnknownPred, [A9WriteAdr2]>]>;
+
+// Define LDM Resources.
+// These take no issue resource, so they can be combined with other
+// writes like WriteB.
+// A9WriteLMLo takes a single LS resource and 2 cycles.
+def A9WriteLMLo : SchedWriteRes<[A9UnitLS]> { let Latency = 2;
+ let NumMicroOps = 0; }
+// Assuming aligned access, the upper half of each pair is free with
+// the same latency.
+def A9WriteLMHi : SchedWriteRes<[]> { let Latency = 2;
+ let NumMicroOps = 0; }
+// Each A9WriteL#N variant adds N cycles of latency without consuming
+// additional resources.
+foreach NumAddr = 1-8 in {
+def A9WriteL#NumAddr : WriteSequence<
+ [A9WriteLMLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
+def A9WriteL#NumAddr#Hi : WriteSequence<
+ [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// LDM: Load multiple into 32-bit integer registers.
+
+// A9WriteLM variants expand into a pair of writes for each 64-bit
+// value loaded. When the number of registers is odd, the last
+// A9WriteLnHi is naturally ignored because the instruction has no
+// following def operands. These variants take no issue resource, so
+// they may need to be part of a WriteSequence that includes A9WriteIssue.
+def A9WriteLM : SchedWriteVariant<[
+ SchedVar<A9LMAdr1Pred, [A9WriteL1, A9WriteL1Hi]>,
+ SchedVar<A9LMAdr2Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi]>,
+ SchedVar<A9LMAdr3Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi]>,
+ SchedVar<A9LMAdr4Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi,
+ A9WriteL4, A9WriteL4Hi]>,
+ SchedVar<A9LMAdr5Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi,
+ A9WriteL4, A9WriteL4Hi,
+ A9WriteL5, A9WriteL5Hi]>,
+ SchedVar<A9LMAdr6Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi,
+ A9WriteL4, A9WriteL4Hi,
+ A9WriteL5, A9WriteL5Hi,
+ A9WriteL6, A9WriteL6Hi]>,
+ SchedVar<A9LMAdr7Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi,
+ A9WriteL4, A9WriteL4Hi,
+ A9WriteL5, A9WriteL5Hi,
+ A9WriteL6, A9WriteL6Hi,
+ A9WriteL7, A9WriteL7Hi]>,
+ SchedVar<A9LMAdr8Pred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi,
+ A9WriteL4, A9WriteL4Hi,
+ A9WriteL5, A9WriteL5Hi,
+ A9WriteL6, A9WriteL6Hi,
+ A9WriteL7, A9WriteL7Hi,
+ A9WriteL8, A9WriteL8Hi]>,
+ // For unknown LDMs, define the maximum number of writes, but only
+ // make the first two consume resources.
+ SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3Hi, A9WriteL3Hi,
+ A9WriteL4Hi, A9WriteL4Hi,
+ A9WriteL5Hi, A9WriteL5Hi,
+ A9WriteL6Hi, A9WriteL6Hi,
+ A9WriteL7Hi, A9WriteL7Hi,
+ A9WriteL8Hi, A9WriteL8Hi]>]> {
+ let Variadic = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// VFP Load/Store Multiple Variants, and NEON VLDn/VSTn support.
+
+// A9WriteLfpOp is the same as A9WriteLSfp but takes no issue resources
+// so can be used in WriteSequences for in single-issue instructions that
+// encapsulate multiple loads.
+def A9WriteLfpOp : SchedWriteRes<[A9UnitLS, A9UnitFP]> {
+ let Latency = 1;
+ let NumMicroOps = 0;
+}
+
+foreach NumAddr = 1-8 in {
+
+// Helper for A9WriteLfp1-8: A sequence of fp loads with no micro-ops.
+def A9WriteLfp#NumAddr#Seq : WriteSequence<[A9WriteLfpOp], NumAddr>;
+
+// A9WriteLfp1-8 definitions are statically expanded into a sequence of
+// A9WriteLfpOps with additive latency that takes a single issue slot.
+// Used directly to describe NEON VLDn.
+def A9WriteLfp#NumAddr : WriteSequence<
+ [A9WriteIssue, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
+
+// A9WriteLfp1-8Mov adds a cycle of latency and FP resource for
+// permuting loaded values.
+def A9WriteLfp#NumAddr#Mov : WriteSequence<
+ [A9WriteF, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
+
+} // foreach NumAddr
+
+// Define VLDM/VSTM PreRA resources.
+// A9WriteLMfpPreRA are dynamically expanded into the correct
+// A9WriteLfp1-8 sequence based on a predicate. This supports the
+// preRA VLDM variants in which all 64-bit loads are written to the
+// same tuple of either single or double precision registers.
+def A9WriteLMfpPreRA : SchedWriteVariant<[
+ SchedVar<A9LMAdr1Pred, [A9WriteLfp1]>,
+ SchedVar<A9LMAdr2Pred, [A9WriteLfp2]>,
+ SchedVar<A9LMAdr3Pred, [A9WriteLfp3]>,
+ SchedVar<A9LMAdr4Pred, [A9WriteLfp4]>,
+ SchedVar<A9LMAdr5Pred, [A9WriteLfp5]>,
+ SchedVar<A9LMAdr6Pred, [A9WriteLfp6]>,
+ SchedVar<A9LMAdr7Pred, [A9WriteLfp7]>,
+ SchedVar<A9LMAdr8Pred, [A9WriteLfp8]>,
+ // For unknown VLDM/VSTM PreRA, assume 2xS registers.
+ SchedVar<A9LMUnknownPred, [A9WriteLfp2]>]>;
+
+// Define VLDM/VSTM PostRA Resources.
+// A9WriteLMfpLo takes a LS and FP resource and one issue slot but no latency.
+def A9WriteLMfpLo : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 0; }
+
+foreach NumAddr = 1-8 in {
+
+// Each A9WriteL#N variant adds N cycles of latency without consuming
+// additional resources.
+def A9WriteLMfp#NumAddr : WriteSequence<
+ [A9WriteLMfpLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
+
+// Assuming aligned access, the upper half of each pair is free with
+// the same latency.
+def A9WriteLMfp#NumAddr#Hi : WriteSequence<
+ [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
+
+} // foreach NumAddr
+
+// VLDM PostRA Variants. These variants expand A9WriteLMfpPostRA into a
+// pair of writes for each 64-bit data loaded. When the number of
+// registers is odd, the last WriteLMfpnHi is naturally ignored because
+// the instruction has no following def operands.
+def A9WriteLMfpPostRA : SchedWriteVariant<[
+ SchedVar<A9LMAdr1Pred, [A9WriteLMfp1, A9WriteLMfp1Hi]>,
+ SchedVar<A9LMAdr2Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi]>,
+ SchedVar<A9LMAdr3Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3, A9WriteLMfp3Hi]>,
+ SchedVar<A9LMAdr4Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3, A9WriteLMfp3Hi,
+ A9WriteLMfp4, A9WriteLMfp4Hi]>,
+ SchedVar<A9LMAdr5Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3, A9WriteLMfp3Hi,
+ A9WriteLMfp4, A9WriteLMfp4Hi,
+ A9WriteLMfp5, A9WriteLMfp5Hi]>,
+ SchedVar<A9LMAdr6Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3, A9WriteLMfp3Hi,
+ A9WriteLMfp4, A9WriteLMfp4Hi,
+ A9WriteLMfp5, A9WriteLMfp5Hi,
+ A9WriteLMfp6, A9WriteLMfp6Hi]>,
+ SchedVar<A9LMAdr7Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3, A9WriteLMfp3Hi,
+ A9WriteLMfp4, A9WriteLMfp4Hi,
+ A9WriteLMfp5, A9WriteLMfp5Hi,
+ A9WriteLMfp6, A9WriteLMfp6Hi,
+ A9WriteLMfp7, A9WriteLMfp7Hi]>,
+ SchedVar<A9LMAdr8Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3, A9WriteLMfp3Hi,
+ A9WriteLMfp4, A9WriteLMfp4Hi,
+ A9WriteLMfp5, A9WriteLMfp5Hi,
+ A9WriteLMfp6, A9WriteLMfp6Hi,
+ A9WriteLMfp7, A9WriteLMfp7Hi,
+ A9WriteLMfp8, A9WriteLMfp8Hi]>,
+ // For unknown LDMs, define the maximum number of writes, but only
+ // make the first two consume resources.
+ SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp1Hi,
+ A9WriteLMfp2, A9WriteLMfp2Hi,
+ A9WriteLMfp3Hi, A9WriteLMfp3Hi,
+ A9WriteLMfp4Hi, A9WriteLMfp4Hi,
+ A9WriteLMfp5Hi, A9WriteLMfp5Hi,
+ A9WriteLMfp6Hi, A9WriteLMfp6Hi,
+ A9WriteLMfp7Hi, A9WriteLMfp7Hi,
+ A9WriteLMfp8Hi, A9WriteLMfp8Hi]>]> {
+ let Variadic = 1;
+}
+
+// Distinguish between our multiple MI-level forms of the same
+// VLDM/VSTM instructions.
+def A9PreRA : SchedPredicate<
+ "TargetRegisterInfo::isVirtualRegister(MI->getOperand(0).getReg())">;
+def A9PostRA : SchedPredicate<
+ "TargetRegisterInfo::isPhysicalRegister(MI->getOperand(0).getReg())">;
+
+// VLDM represents all destination registers as a single register
+// tuple, unlike LDM. So the number of write operands is not variadic.
+def A9WriteLMfp : SchedWriteVariant<[
+ SchedVar<A9PreRA, [A9WriteLMfpPreRA]>,
+ SchedVar<A9PostRA, [A9WriteLMfpPostRA]>]>;
+
+//===----------------------------------------------------------------------===//
+// Resources for other (non LDM/VLDM) Variants.
+
+// These mov immediate writers are unconditionally expanded with
+// additive latency.
+def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>;
+def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, A9WriteA]>;
+def A9WriteI2ld : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>;
+
+// Some ALU operations can read loaded integer values one cycle early.
+def A9ReadA : SchedReadAdvance<1,
+ [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi,
+ A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4,
+ A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8,
+ A9WriteL1Hi, A9WriteL2Hi, A9WriteL3Hi, A9WriteL4Hi,
+ A9WriteL5Hi, A9WriteL6Hi, A9WriteL7Hi, A9WriteL8Hi]>;
+
+// Read types for operands that are unconditionally read in cycle N
+// after the instruction issues, decreases producer latency by N-1.
+def A9Read2 : SchedReadAdvance<1>;
+def A9Read3 : SchedReadAdvance<2>;
+def A9Read4 : SchedReadAdvance<3>;
+
+//===----------------------------------------------------------------------===//
+// Map itinerary classes to scheduler read/write resources per operand.
+//
+// For ARM, we piggyback scheduler resources on the Itinerary classes
+// to avoid perturbing the existing instruction definitions.
+
+// This table follows the ARM Cortex-A9 Technical Reference Manuals,
+// mostly in order.
+let SchedModel = CortexA9Model in {
+
+def :ItinRW<[A9WriteI], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi,
+ IIC_iMVNi,IIC_iMVNsi,
+ IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>;
+def :ItinRW<[A9WriteI,A9ReadA],[IIC_iMVNr]>;
+def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>;
+
+def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>;
+def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>;
+def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>;
+
+def :ItinRW<[A9WriteA], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
+def :ItinRW<[A9WriteA, A9ReadA], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
+def :ItinRW<[A9WriteA, A9ReadA, A9ReadA],[IIC_iALUr,IIC_iCMPr]>;
+def :ItinRW<[A9WriteAsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
+def :ItinRW<[A9WriteAsi, A9ReadA], [IIC_iALUsi]>;
+def :ItinRW<[A9WriteAsi, ReadDefault, A9ReadA], [IIC_iALUsir]>; // RSB
+def :ItinRW<[A9WriteAsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
+def :ItinRW<[A9WriteAsr, A9ReadA], [IIC_iALUsr,IIC_iCMPsr]>;
+
+// A9WriteHi ignored for MUL32.
+def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32,
+ IIC_iMUL64,IIC_iMAC64]>;
+// FIXME: SMLALxx needs itin classes
+def :ItinRW<[A9WriteM16, A9WriteM16Hi], [IIC_iMUL16,IIC_iMAC16]>;
+
+// TODO: For floating-point ops, we model the pipeline forwarding
+// latencies here. WAW latencies are sometimes longer.
+
+def :ItinRW<[A9WriteFMov], [IIC_fpSTAT, IIC_fpMOVIS, IIC_fpMOVID, IIC_fpMOVSI,
+ IIC_fpUNA32, IIC_fpUNA64,
+ IIC_fpCMP32, IIC_fpCMP64]>;
+def :ItinRW<[A9WriteFMov, A9WriteFMov], [IIC_fpMOVDI]>;
+def :ItinRW<[A9WriteF], [IIC_fpCVTSD, IIC_fpCVTDS, IIC_fpCVTSH, IIC_fpCVTHS,
+ IIC_fpCVTIS, IIC_fpCVTID, IIC_fpCVTSI, IIC_fpCVTDI,
+ IIC_fpALU32, IIC_fpALU64]>;
+def :ItinRW<[A9WriteFMulS], [IIC_fpMUL32]>;
+def :ItinRW<[A9WriteFMulD], [IIC_fpMUL64]>;
+def :ItinRW<[A9WriteFMAS], [IIC_fpMAC32]>;
+def :ItinRW<[A9WriteFMAD], [IIC_fpMAC64]>;
+def :ItinRW<[A9WriteFDivS], [IIC_fpDIV32]>;
+def :ItinRW<[A9WriteFDivD], [IIC_fpDIV64]>;
+def :ItinRW<[A9WriteFSqrtS], [IIC_fpSQRT32]>;
+def :ItinRW<[A9WriteFSqrtD], [IIC_fpSQRT64]>;
+
+def :ItinRW<[A9WriteB], [IIC_Br]>;
+
+// A9 PLD is processed in a dedicated unit.
+def :ItinRW<[], [IIC_Preload]>;
+
+// Note: We must assume that loads are aligned, since the machine
+// model cannot know this statically and A9 ignores alignment hints.
+
+// A9WriteAdr consumes AGU regardless address writeback. But it's
+// latency is only relevant for users of an updated address.
+def :ItinRW<[A9WriteL, A9WriteAdr], [IIC_iLoad_i,IIC_iLoad_r,
+ IIC_iLoad_iu,IIC_iLoad_ru]>;
+def :ItinRW<[A9WriteLsi, A9WriteAdr], [IIC_iLoad_si,IIC_iLoad_siu]>;
+def :ItinRW<[A9WriteLb, A9WriteAdr2], [IIC_iLoad_bh_i,IIC_iLoad_bh_r,
+ IIC_iLoad_bh_iu,IIC_iLoad_bh_ru]>;
+def :ItinRW<[A9WriteLbsi, A9WriteAdr2], [IIC_iLoad_bh_si,IIC_iLoad_bh_siu]>;
+def :ItinRW<[A9WriteL, A9WriteLHi, A9WriteAdr], [IIC_iLoad_d_i,IIC_iLoad_d_r,
+ IIC_iLoad_d_ru]>;
+// Store either has no def operands, or the one def for address writeback.
+def :ItinRW<[A9WriteAdr, A9WriteS], [IIC_iStore_i, IIC_iStore_r,
+ IIC_iStore_iu, IIC_iStore_ru,
+ IIC_iStore_d_i, IIC_iStore_d_r,
+ IIC_iStore_d_ru]>;
+def :ItinRW<[A9WriteAdr2, A9WriteS], [IIC_iStore_si, IIC_iStore_siu,
+ IIC_iStore_bh_i, IIC_iStore_bh_r,
+ IIC_iStore_bh_iu, IIC_iStore_bh_ru]>;
+def :ItinRW<[A9WriteAdr3, A9WriteS], [IIC_iStore_bh_si, IIC_iStore_bh_siu]>;
+
+// A9WriteML will be expanded into a separate write for each def
+// operand. Address generation consumes resources, but A9WriteLMAdr
+// is listed after all def operands, so has no effective latency.
+//
+// Note: A9WriteLM expands into an even number of def operands. The
+// actual number of def operands may be less by one.
+def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteIssue], [IIC_iLoad_m, IIC_iPop]>;
+
+// Load multiple with address writeback has an extra def operand in
+// front of the loaded registers.
+//
+// Reuse the load-multiple variants for store-multiple because the
+// resources are identical, For stores only the address writeback
+// has a def operand so the WriteL latencies are unused.
+def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu,
+ IIC_iStore_m,
+ IIC_iStore_mu]>;
+def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>;
+def :ItinRW<[A9WriteL, A9WriteAdr, A9WriteA], [IIC_iLoadiALU]>;
+
+def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>;
+
+def :ItinRW<[A9WriteLMfp, A9WriteLMAdr], [IIC_fpLoad_m]>;
+def :ItinRW<[A9WriteLMAdr, A9WriteLMfp], [IIC_fpLoad_mu]>;
+def :ItinRW<[A9WriteAdr, A9WriteLSfp], [IIC_fpStore32, IIC_fpStore64,
+ IIC_fpStore_m, IIC_fpStore_mu]>;
+
+// Note: Unlike VLDM, VLD1 expects the writeback operand after the
+// normal writes.
+def :ItinRW<[A9WriteLfp1, A9WriteAdr1], [IIC_VLD1, IIC_VLD1u,
+ IIC_VLD1x2, IIC_VLD1x2u]>;
+def :ItinRW<[A9WriteLfp2, A9WriteAdr2], [IIC_VLD1x3, IIC_VLD1x3u,
+ IIC_VLD1x4, IIC_VLD1x4u,
+ IIC_VLD4dup, IIC_VLD4dupu]>;
+def :ItinRW<[A9WriteLfp1Mov, A9WriteAdr1], [IIC_VLD1dup, IIC_VLD1dupu,
+ IIC_VLD2, IIC_VLD2u,
+ IIC_VLD2dup, IIC_VLD2dupu]>;
+def :ItinRW<[A9WriteLfp2Mov, A9WriteAdr1], [IIC_VLD1ln, IIC_VLD1lnu,
+ IIC_VLD2x2, IIC_VLD2x2u,
+ IIC_VLD2ln, IIC_VLD2lnu]>;
+def :ItinRW<[A9WriteLfp3Mov, A9WriteAdr3], [IIC_VLD3, IIC_VLD3u,
+ IIC_VLD3dup, IIC_VLD3dupu]>;
+def :ItinRW<[A9WriteLfp4Mov, A9WriteAdr4], [IIC_VLD4, IIC_VLD4u,
+ IIC_VLD4ln, IIC_VLD4lnu]>;
+def :ItinRW<[A9WriteLfp5Mov, A9WriteAdr5], [IIC_VLD3ln, IIC_VLD3lnu]>;
+
+// Vector stores use similar resources to vector loads, so use the
+// same write types. The address write must be first for stores with
+// address writeback.
+def :ItinRW<[A9WriteAdr1, A9WriteLfp1], [IIC_VST1, IIC_VST1u,
+ IIC_VST1x2, IIC_VST1x2u,
+ IIC_VST1ln, IIC_VST1lnu,
+ IIC_VST2, IIC_VST2u,
+ IIC_VST2x2, IIC_VST2x2u,
+ IIC_VST2ln, IIC_VST2lnu]>;
+def :ItinRW<[A9WriteAdr2, A9WriteLfp2], [IIC_VST1x3, IIC_VST1x3u,
+ IIC_VST1x4, IIC_VST1x4u,
+ IIC_VST3, IIC_VST3u,
+ IIC_VST3ln, IIC_VST3lnu,
+ IIC_VST4, IIC_VST4u,
+ IIC_VST4ln, IIC_VST4lnu]>;
+
+// NEON moves.
+def :ItinRW<[A9WriteV2], [IIC_VMOVSI, IIC_VMOVDI, IIC_VMOVD, IIC_VMOVQ]>;
+def :ItinRW<[A9WriteV1], [IIC_VMOV, IIC_VMOVIS, IIC_VMOVID]>;
+def :ItinRW<[A9WriteV3], [IIC_VMOVISL, IIC_VMOVN]>;
+
+// NEON integer arithmetic
+//
+// VADD/VAND/VORR/VEOR/VBIC/VORN/VBIT/VBIF/VBSL
+def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VBINiD, IIC_VBINiQ]>;
+// VSUB/VMVN/VCLSD/VCLZD/VCNTD
+def :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>;
+// VADDL/VSUBL/VNEG are mapped later under IIC_SHLi.
+// ...
+// VHADD/VRHADD/VQADD/VTST/VADH/VRADH
+def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>;
+// VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL
+def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>;
+// VQNEG/VQABS
+def :ItinRW<[A9WriteV4], [IIC_VQUNAiD, IIC_VQUNAiQ]>;
+// VABS
+def :ItinRW<[A9WriteV4, A9Read2], [IIC_VUNAiD, IIC_VUNAiQ]>;
+// VPADD/VPADDL are mapped later under IIC_SHLi.
+// ...
+// VCLSQ/VCLZQ/VCNTQ, takes two cycles.
+def :ItinRW<[A9Write2V4, A9Read3], [IIC_VCNTiQ]>;
+// VMOVimm/VMVNimm/VORRimm/VBICimm
+def :ItinRW<[A9WriteV3], [IIC_VMOVImm]>;
+def :ItinRW<[A9WriteV6, A9Read3, A9Read2], [IIC_VABAD, IIC_VABAQ]>;
+def :ItinRW<[A9WriteV6, A9Read3], [IIC_VPALiD, IIC_VPALiQ]>;
+
+// NEON integer multiply
+//
+// Note: these don't quite match the timing docs, but they do match
+// the original A9 itinerary.
+def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VMULi16D]>;
+def :ItinRW<[A9WriteV7, A9Read2, A9Read2], [IIC_VMULi16Q]>;
+def :ItinRW<[A9Write2V7, A9Read2], [IIC_VMULi32D]>;
+def :ItinRW<[A9Write2V9, A9Read2], [IIC_VMULi32Q]>;
+def :ItinRW<[A9WriteV6, A9Read3, A9Read2, A9Read2], [IIC_VMACi16D]>;
+def :ItinRW<[A9WriteV7, A9Read3, A9Read2, A9Read2], [IIC_VMACi16Q]>;
+def :ItinRW<[A9Write2V7, A9Read3, A9Read2], [IIC_VMACi32D]>;
+def :ItinRW<[A9Write2V9, A9Read3, A9Read2], [IIC_VMACi32Q]>;
+
+// NEON integer shift
+// TODO: Q,Q,Q shifts should actually reserve FP for 2 cycles.
+def :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>;
+def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>;
+
+// NEON permute
+def :ItinRW<[A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>;
+def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2],
+ [IIC_VPERMQ3, IIC_VEXTQ]>;
+def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>;
+def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VTB2]>;
+def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3], [IIC_VTB3]>;
+def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3, A9Read3], [IIC_VTB4]>;
+def :ItinRW<[A9WriteV3, ReadDefault, A9Read2], [IIC_VTBX1]>;
+def :ItinRW<[A9WriteV3, ReadDefault, A9Read2, A9Read2], [IIC_VTBX2]>;
+def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3], [IIC_VTBX3]>;
+def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3, A9Read3],
+ [IIC_VTBX4]>;
+// NEON floating-point
+def :ItinRW<[A9WriteV5, A9Read2, A9Read2], [IIC_VBIND]>;
+def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VBINQ]>;
+def :ItinRW<[A9WriteV5, A9Read2], [IIC_VUNAD, IIC_VFMULD]>;
+def :ItinRW<[A9WriteV6, A9Read2], [IIC_VUNAQ, IIC_VFMULQ]>;
+def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>;
+def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>;
+def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>;
+def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>;
+} // SchedModel = CortexA9Model
diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td
new file mode 100644
index 0000000..e9bc3e0
--- /dev/null
+++ b/lib/Target/ARM/ARMScheduleSwift.td
@@ -0,0 +1,1085 @@
+//=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Swift processor..
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// This section contains legacy support for itineraries. This is
+// required until SD and PostRA schedulers are replaced by MachineScheduler.
+
+def SW_DIS0 : FuncUnit;
+def SW_DIS1 : FuncUnit;
+def SW_DIS2 : FuncUnit;
+
+def SW_ALU0 : FuncUnit;
+def SW_ALU1 : FuncUnit;
+def SW_LS : FuncUnit;
+def SW_IDIV : FuncUnit;
+def SW_FDIV : FuncUnit;
+
+// FIXME: Need bypasses.
+// FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and
+// IIC_iMOVix2ld better.
+// FIXME: Model the special immediate shifts that are not microcoded.
+// FIXME: Do we need to model the fact that uses of r15 in a micro-op force it
+// to issue on pipe 1?
+// FIXME: Model the pipelined behavior of CMP / TST instructions.
+// FIXME: Better model the microcode stages of multiply instructions, especially
+// conditional variants.
+// FIXME: Add preload instruction when it is documented.
+// FIXME: Model non-pipelined nature of FP div / sqrt unit.
+
+def SwiftItineraries : ProcessorItineraries<
+ [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [3]>,
+ InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_LS]>],
+ [5]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1]>,
+ //
+ // Unary Instructions that produce a result
+
+ // CLZ, RBIT, etc.
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+
+ // BFC, BFI, UBFX, SBFX
+ InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1]>,
+
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<2, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<2, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iTSTsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<2, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iTSTsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<2, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ //
+ // Move instructions, conditional
+ // FIXME: Correctly model the extra input dep on the destination.
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2]>,
+
+ // Integer multiply pipeline
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_ALU0], 3>,
+ InstrStage<1, [SW_ALU0]>],
+ [5, 5, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [5, 6, 1, 1]>,
+ //
+ // Integer divide
+ InstrItinData<IIC_iDIV , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0], 0>,
+ InstrStage<14, [SW_IDIV]>],
+ [14, 1, 1]>,
+
+ // Integer load pipeline
+ // FIXME: The timings are some rough approximations
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1]>,
+ InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 4, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [3, 4, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iLoad_si , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS]>],
+ [5, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS]>],
+ [5, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [3, 4, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [5, 3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [5, 3, 1, 1]>,
+ //
+ // Load multiple, def is the 5th operand.
+ // FIXME: This assumes 3 to 4 registers.
+ InstrItinData<IIC_iLoad_m , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1, 3], [], -1>, // dynamic uops
+
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Pop + branch, def is the 3rd operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 3], [], -1>, // dynamic uops
+
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [4, 1]>,
+
+ // Integer store pipeline
+ ///
+ // Immediate offset
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iStore_si , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStore_ru , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iStore_siu, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
+ [3, 1, 1, 1]>,
+ //
+ // Store multiple
+ InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [], [], -1>, // dynamic uops
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [2], [], -1>, // dynamic uops
+
+ //
+ // Preload
+ InstrItinData<IIC_Preload, [InstrStage<1, [SW_DIS0], 0>], [1, 1]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [SW_DIS0], 0>]>,
+
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ //
+ // Single-precision FP Unary
+ //
+ // Most floating-point moves get issued on ALU0.
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [1, 1]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+
+ //
+ // Single to Half FP Convert
+ InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU1], 4>,
+ InstrStage<1, [SW_ALU1]>],
+ [6, 1]>,
+ //
+ // Half to Single FP Convert
+ InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [6, 1, 1]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [12, 1, 1]>,
+ //
+ // Single-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [12, 1, 1]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 0>,
+ InstrStage<15, [SW_FDIV]>],
+ [17, 1, 1]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 0>,
+ InstrStage<30, [SW_FDIV]>],
+ [32, 1, 1]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 0>,
+ InstrStage<15, [SW_FDIV]>],
+ [17, 1]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 0>,
+ InstrStage<30, [SW_FDIV]>],
+ [32, 1, 1]>,
+
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0]>],
+ [6, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [4, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_LS]>],
+ [3, 4, 1]>,
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [4, 1]>,
+ //
+ // Double-precision FP Load
+ InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [4, 1]>,
+ //
+ // FP Load Multiple
+ // FIXME: Assumes a single Q register.
+ InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 4], [], -1>, // dynamic uops
+ //
+ // FP Load Multiple + update
+ // FIXME: Assumes a single Q register.
+ InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1, 4], [], -1>, // dynamic uops
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Store
+ InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ //
+ // FP Store Multiple
+ // FIXME: Assumes a single Q register.
+ InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1], [], -1>, // dynamic uops
+ //
+ // FP Store Multiple + update
+ // FIXME: Assumes a single Q register.
+ InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1], [], -1>, // dynamic uops
+ // NEON
+ //
+ // Double-register Integer Unary
+ InstrItinData<IIC_VUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1]>,
+ //
+ // Quad-register Integer Unary
+ InstrItinData<IIC_VUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1]>,
+ //
+ // Double-register Integer Q-Unary
+ InstrItinData<IIC_VQUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1]>,
+ //
+ // Quad-register Integer CountQ-Unary
+ InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1]>,
+ //
+ // Double-register Integer Binary
+ InstrItinData<IIC_VBINiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register Integer Binary
+ InstrItinData<IIC_VBINiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-register Integer Subtract
+ InstrItinData<IIC_VSUBiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register Integer Subtract
+ InstrItinData<IIC_VSUBiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-register Integer Shift
+ InstrItinData<IIC_VSHLiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register Integer Shift
+ InstrItinData<IIC_VSHLiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Double-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Double-register Integer Subtract (4 cycle)
+ InstrItinData<IIC_VSUBi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Subtract (4 cycle)
+ InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+
+ //
+ // Double-register Integer Count
+ InstrItinData<IIC_VCNTiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register Integer Count
+ InstrItinData<IIC_VCNTiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1, 1]>,
+ //
+ // Quad-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1, 1]>,
+ //
+ // Double-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+
+ //
+ // Double-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+
+ //
+ // Double-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1, 1]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1, 1]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1, 1]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1, 1]>,
+
+ //
+ // Move
+ InstrItinData<IIC_VMOV, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+ //
+ // Move Immediate
+ InstrItinData<IIC_VMOVImm, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2]>,
+ //
+ // Double-register Permute Move
+ InstrItinData<IIC_VMOVD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1]>,
+ //
+ // Quad-register Permute Move
+ InstrItinData<IIC_VMOVQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1]>,
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_VMOVIS , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0]>],
+ [6, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_VMOVID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [4, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_VMOVSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_VMOVDI , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_LS]>],
+ [3, 4, 1]>,
+ //
+ // Integer to Lane Move
+ // FIXME: I think this is correct, but it is not clear from the tuning guide.
+ InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0]>],
+ [6, 1]>,
+
+ //
+ // Vector narrow move
+ InstrItinData<IIC_VMOVN, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1]>,
+ //
+ // Double-register FP Unary
+ // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
+ // and they issue on a different pipeline.
+ InstrItinData<IIC_VUNAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+ //
+ // Quad-register FP Unary
+ // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
+ // and they issue on a different pipeline.
+ InstrItinData<IIC_VUNAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+ //
+ // Double-register FP Binary
+ // FIXME: We're using this itin for many instructions.
+ InstrItinData<IIC_VBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+
+ //
+ // VPADD, etc.
+ InstrItinData<IIC_VPBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Double-register FP VMUL
+ InstrItinData<IIC_VFMULD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register FP Binary
+ InstrItinData<IIC_VBINQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register FP VMUL
+ InstrItinData<IIC_VFMULQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Double-register FP Multiple-Accumulate
+ InstrItinData<IIC_VMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Quad-register FP Multiple-Accumulate
+ InstrItinData<IIC_VMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-register Fused FP Multiple-Accumulate
+ InstrItinData<IIC_VFMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Quad-register FusedF P Multiple-Accumulate
+ InstrItinData<IIC_VFMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-register Reciprical Step
+ InstrItinData<IIC_VRECSD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Quad-register Reciprical Step
+ InstrItinData<IIC_VRECSQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-register Permute
+ // FIXME: The latencies are unclear from the documentation.
+ InstrItinData<IIC_VPERMD, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [3, 4, 3, 4]>,
+ //
+ // Quad-register Permute
+ // FIXME: The latencies are unclear from the documentation.
+ InstrItinData<IIC_VPERMQ, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [3, 4, 3, 4]>,
+ //
+ // Quad-register Permute (3 cycle issue on A9)
+ InstrItinData<IIC_VPERMQ3, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [3, 4, 3, 4]>,
+
+ //
+ // Double-register VEXT
+ InstrItinData<IIC_VEXTD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register VEXT
+ InstrItinData<IIC_VEXTQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1, 1]>,
+ //
+ // VTB
+ InstrItinData<IIC_VTB1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VTB2, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 3, 3]>,
+ InstrItinData<IIC_VTB3, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [6, 1, 3, 5, 5]>,
+ InstrItinData<IIC_VTB4, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 3, 5, 7, 7]>,
+ //
+ // VTBX
+ InstrItinData<IIC_VTBX1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VTBX2, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 3, 3]>,
+ InstrItinData<IIC_VTBX3, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [6, 1, 3, 5, 5]>,
+ InstrItinData<IIC_VTBX4, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 3, 5, 7, 7]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// This following definitions describe the simple machine model which
+// will replace itineraries.
+
+// Swift machine model for scheduling and other instruction cost heuristics.
+def SwiftModel : SchedMachineModel {
+ let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
+ let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
+ let LoadLatency = 3;
+
+ let Itineraries = SwiftItineraries;
+}
+
+// TODO: Add Swift processor and scheduler resources.
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 31d5d38..41a7e0c 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -13,8 +13,8 @@
#define DEBUG_TYPE "arm-selectiondag-info"
#include "ARMTargetMachine.h"
-#include "llvm/DerivedTypes.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/DerivedTypes.h"
using namespace llvm;
ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM)
@@ -155,7 +155,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
TargetLowering::ArgListEntry Entry;
// First argument: data pointer
- Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*DAG.getContext());
+ Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*DAG.getContext());
Entry.Node = Dst;
Entry.Ty = IntPtrTy;
Args.push_back(Entry);
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 89e29ad..058d4c4 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#include "ARMSubtarget.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
@@ -31,6 +32,10 @@ static cl::opt<bool>
DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden);
static cl::opt<bool>
+UseFusedMulOps("arm-use-mulops",
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
StrictAlign("arm-strict-align", cl::Hidden,
cl::desc("Disallow all unaligned memory accesses"));
@@ -49,6 +54,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
, HasVFPv4(false)
, HasNEON(false)
, UseNEONForSinglePrecisionFP(false)
+ , UseMulOps(UseFusedMulOps)
, SlowFPVMLx(false)
, HasVMLxForwarding(false)
, SlowFPBrcc(false)
@@ -63,10 +69,12 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
, HasFP16(false)
, HasD16(false)
, HasHardwareDivide(false)
+ , HasHardwareDivideInARM(false)
, HasT2ExtractPack(false)
, HasDataBarrier(false)
, Pref32BitThumb(false)
, AvoidCPSRPartialUpdate(false)
+ , AvoidMOVsShifterOperand(false)
, HasRAS(false)
, HasMPExtension(false)
, FPOnlySP(false)
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index b394061..64878cd 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -15,9 +15,9 @@
#define ARMSUBTARGET_H
#include "MCTargetDesc/ARMMCTargetDesc.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
@@ -30,7 +30,7 @@ class StringRef;
class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
- Others, CortexA8, CortexA9
+ Others, CortexA5, CortexA8, CortexA9, CortexA15, CortexR5, Swift
};
/// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
@@ -57,6 +57,10 @@ protected:
/// determine if NEON should actually be used.
bool UseNEONForSinglePrecisionFP;
+ /// UseMulOps - True if non-microcoded fused integer multiply-add and
+ /// multiply-subtract instructions should be used.
+ bool UseMulOps;
+
/// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates
/// whether the FP VML[AS] instructions are slow (if so, don't use them).
bool SlowFPVMLx;
@@ -107,6 +111,9 @@ protected:
/// HasHardwareDivide - True if subtarget supports [su]div
bool HasHardwareDivide;
+ /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode
+ bool HasHardwareDivideInARM;
+
/// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack
/// instructions.
bool HasT2ExtractPack;
@@ -124,6 +131,10 @@ protected:
/// CPSR setting instruction.
bool AvoidCPSRPartialUpdate;
+ /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting
+ /// movs with shifter operand (i.e. asr, lsl, lsr).
+ bool AvoidMOVsShifterOperand;
+
/// HasRAS - Some processors perform return stack prediction. CodeGen should
/// avoid issue "normal" call instructions to callees which do not return.
bool HasRAS;
@@ -197,9 +208,14 @@ protected:
bool hasV6T2Ops() const { return HasV6T2Ops; }
bool hasV7Ops() const { return HasV7Ops; }
+ bool isCortexA5() const { return ARMProcFamily == CortexA5; }
bool isCortexA8() const { return ARMProcFamily == CortexA8; }
bool isCortexA9() const { return ARMProcFamily == CortexA9; }
+ bool isCortexA15() const { return ARMProcFamily == CortexA15; }
+ bool isSwift() const { return ARMProcFamily == Swift; }
bool isCortexM3() const { return CPUString == "cortex-m3"; }
+ bool isLikeA9() const { return isCortexA9() || isCortexA15(); }
+ bool isCortexR5() const { return ARMProcFamily == CortexR5; }
bool hasARMOps() const { return !NoARM; }
@@ -211,14 +227,17 @@ protected:
return hasNEON() && UseNEONForSinglePrecisionFP; }
bool hasDivide() const { return HasHardwareDivide; }
+ bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
bool hasT2ExtractPack() const { return HasT2ExtractPack; }
bool hasDataBarrier() const { return HasDataBarrier; }
+ bool useMulOps() const { return UseMulOps; }
bool useFPVMLx() const { return !SlowFPVMLx; }
bool hasVMLxForwarding() const { return HasVMLxForwarding; }
bool isFPBrccSlow() const { return SlowFPBrcc; }
bool isFPOnlySP() const { return FPOnlySP; }
bool prefers32BitThumb() const { return Pref32BitThumb; }
bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
+ bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
bool hasRAS() const { return HasRAS; }
bool hasMPExtension() const { return HasMPExtension; }
bool hasThumb2DSP() const { return Thumb2DSP; }
@@ -231,7 +250,7 @@ protected:
bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
bool isTargetNaCl() const {
- return TargetTriple.getOS() == Triple::NativeClient;
+ return TargetTriple.getOS() == Triple::NaCl;
}
bool isTargetELF() const { return !isTargetDarwin(); }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 171c9ad..7745218 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -11,11 +11,11 @@
//===----------------------------------------------------------------------===//
#include "ARMTargetMachine.h"
-#include "ARMFrameLowering.h"
#include "ARM.h"
-#include "llvm/PassManager.h"
+#include "ARMFrameLowering.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
@@ -51,6 +51,15 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
this->Options.FloatABIType = FloatABI::Soft;
}
+void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our ARM pass. This
+ // allows the ARM pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+ PM.add(createARMTargetTransformInfoPass(this));
+}
+
+
void ARMTargetMachine::anchor() { }
ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
@@ -60,7 +69,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL)
: ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
InstrInfo(Subtarget),
- DataLayout(Subtarget.isAPCS_ABI() ?
+ DL(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:64-i64:32:64-"
"v128:32:128-v64:32:64-n32-S32") :
Subtarget.isAAPCS_ABI() ?
@@ -68,7 +77,6 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
"v128:64:128-v64:64:64-n32-S64") :
std::string("e-p:32:32-f64:64:64-i64:64:64-"
"v128:64:128-v64:64:64-n32-S32")),
- ELFWriterInfo(*this),
TLInfo(*this),
TSInfo(*this),
FrameLowering(Subtarget) {
@@ -88,7 +96,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
InstrInfo(Subtarget.hasThumb2()
? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
: ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
- DataLayout(Subtarget.isAPCS_ABI() ?
+ DL(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:64-i64:32:64-"
"i16:16:32-i8:8:32-i1:8:32-"
"v128:32:128-v64:32:64-a:0:32-n32-S32") :
@@ -99,7 +107,6 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
std::string("e-p:32:32-f64:64:64-i64:64:64-"
"i16:16:32-i8:8:32-i1:8:32-"
"v128:64:128-v64:64:64-a:0:32-n32-S32")),
- ELFWriterInfo(*this),
TLInfo(*this),
TSInfo(*this),
FrameLowering(Subtarget.hasThumb2()
@@ -143,6 +150,11 @@ bool ARMPassConfig::addPreISel() {
bool ARMPassConfig::addInstSelector() {
addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
+
+ const ARMSubtarget *Subtarget = &getARMSubtarget();
+ if (Subtarget->isTargetELF() && !Subtarget->isThumb1Only() &&
+ TM->Options.EnableFastISel)
+ addPass(createARMGlobalBaseRegPass());
return false;
}
@@ -150,7 +162,7 @@ bool ARMPassConfig::addPreRegAlloc() {
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only())
addPass(createARMLoadStoreOptimizationPass(true));
- if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9())
+ if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9())
addPass(createMLxExpansionPass());
return true;
}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index abcdb24..be6bec7 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -14,20 +14,19 @@
#ifndef ARMTARGETMACHINE_H
#define ARMTARGETMACHINE_H
-#include "ARMInstrInfo.h"
-#include "ARMELFWriterInfo.h"
#include "ARMFrameLowering.h"
-#include "ARMJITInfo.h"
-#include "ARMSubtarget.h"
#include "ARMISelLowering.h"
+#include "ARMInstrInfo.h"
+#include "ARMJITInfo.h"
#include "ARMSelectionDAGInfo.h"
-#include "Thumb1InstrInfo.h"
+#include "ARMSubtarget.h"
#include "Thumb1FrameLowering.h"
+#include "Thumb1InstrInfo.h"
#include "Thumb2InstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/ADT/OwningPtr.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -51,6 +50,9 @@ public:
return &InstrItins;
}
+ /// \brief Register ARM analysis passes with a pass manager.
+ virtual void addAnalysisPasses(PassManagerBase &PM);
+
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
@@ -62,8 +64,7 @@ public:
class ARMTargetMachine : public ARMBaseTargetMachine {
virtual void anchor();
ARMInstrInfo InstrInfo;
- const TargetData DataLayout; // Calculates type size & alignment
- ARMELFWriterInfo ELFWriterInfo;
+ const DataLayout DL; // Calculates type size & alignment
ARMTargetLowering TLInfo;
ARMSelectionDAGInfo TSInfo;
ARMFrameLowering FrameLowering;
@@ -88,12 +89,8 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
virtual const ARMFrameLowering *getFrameLowering() const {
return &FrameLowering;
}
-
virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const TargetData *getTargetData() const { return &DataLayout; }
- virtual const ARMELFWriterInfo *getELFWriterInfo() const {
- return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
- }
+ virtual const DataLayout *getDataLayout() const { return &DL; }
};
/// ThumbTargetMachine - Thumb target machine.
@@ -104,8 +101,7 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
virtual void anchor();
// Either Thumb1InstrInfo or Thumb2InstrInfo.
OwningPtr<ARMBaseInstrInfo> InstrInfo;
- const TargetData DataLayout; // Calculates type size & alignment
- ARMELFWriterInfo ELFWriterInfo;
+ const DataLayout DL; // Calculates type size & alignment
ARMTargetLowering TLInfo;
ARMSelectionDAGInfo TSInfo;
// Either Thumb1FrameLowering or ARMFrameLowering.
@@ -138,10 +134,7 @@ public:
virtual const ARMFrameLowering *getFrameLowering() const {
return FrameLowering.get();
}
- virtual const TargetData *getTargetData() const { return &DataLayout; }
- virtual const ARMELFWriterInfo *getELFWriterInfo() const {
- return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
- }
+ virtual const DataLayout *getDataLayout() const { return &DL; }
};
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 3d85ca7..dfdf6ab 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -9,12 +9,14 @@
#include "ARMTargetObjectFile.h"
#include "ARMSubtarget.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/StringExtras.h"
using namespace llvm;
using namespace dwarf;
@@ -38,3 +40,14 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
0,
SectionKind::getMetadata());
}
+
+const MCExpr *ARMElfTargetObjectFile::
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
+ assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only");
+
+ return MCSymbolRefExpr::Create(Mang->getSymbol(GV),
+ MCSymbolRefExpr::VK_ARM_TARGET2,
+ getContext());
+}
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index c6a7261..7f60727 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -28,6 +28,11 @@ public:
virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+ const MCExpr *
+ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const;
+
virtual const MCSection *getAttributesSection() const {
return AttributesSection;
}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
new file mode 100644
index 0000000..03a23be
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -0,0 +1,124 @@
+//===-- ARMTargetTransformInfo.cpp - ARM specific TTI pass ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// ARM target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "armtti"
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't havve a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializeARMTTIPass(PassRegistry &);
+}
+
+namespace {
+
+class ARMTTI : public ImmutablePass, public TargetTransformInfo {
+ const ARMBaseTargetMachine *TM;
+ const ARMSubtarget *ST;
+
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+ ARMTTI() : ImmutablePass(ID), TM(0), ST(0) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ ARMTTI(const ARMBaseTargetMachine *TM)
+ : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()) {
+ initializeARMTTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ pushTTIStack(this);
+ }
+
+ virtual void finalizePass() {
+ popTTIStack();
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ /// Pass identification.
+ static char ID;
+
+ /// Provide necessary pointer adjustments for the two base classes.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo*)this;
+ return this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+
+ virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
+
+ /// @}
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(ARMTTI, TargetTransformInfo, "armtti",
+ "ARM Target Transform Info", true, true, false)
+char ARMTTI::ID = 0;
+
+ImmutablePass *
+llvm::createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM) {
+ return new ARMTTI(TM);
+}
+
+
+unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
+ assert(Ty->isIntegerTy());
+
+ unsigned Bits = Ty->getPrimitiveSizeInBits();
+ if (Bits == 0 || Bits > 32)
+ return 4;
+
+ int32_t SImmVal = Imm.getSExtValue();
+ uint32_t ZImmVal = Imm.getZExtValue();
+ if (!ST->isThumb()) {
+ if ((SImmVal >= 0 && SImmVal < 65536) ||
+ (ARM_AM::getSOImmVal(ZImmVal) != -1) ||
+ (ARM_AM::getSOImmVal(~ZImmVal) != -1))
+ return 1;
+ return ST->hasV6T2Ops() ? 2 : 3;
+ } else if (ST->isThumb2()) {
+ if ((SImmVal >= 0 && SImmVal < 65536) ||
+ (ARM_AM::getT2SOImmVal(ZImmVal) != -1) ||
+ (ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
+ return 1;
+ return ST->hasV6T2Ops() ? 2 : 3;
+ } else /*Thumb1*/ {
+ if (SImmVal >= 0 && SImmVal < 256)
+ return 1;
+ if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
+ return 2;
+ // Load from constantpool.
+ return 3;
+ }
+ return 2;
+}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
deleted file mode 100644
index fda8536..0000000
--- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-//===-- ARMAsmLexer.cpp - Tokenize ARM assembly to AsmTokens --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/ARMBaseInfo.h"
-
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCTargetAsmLexer.h"
-
-#include "llvm/Support/TargetRegistry.h"
-
-#include "llvm/ADT/StringSwitch.h"
-
-#include <string>
-#include <map>
-
-using namespace llvm;
-
-namespace {
-
-class ARMBaseAsmLexer : public MCTargetAsmLexer {
- const MCAsmInfo &AsmInfo;
-
- const AsmToken &lexDefinite() {
- return getLexer()->Lex();
- }
-
- AsmToken LexTokenUAL();
-protected:
- typedef std::map <std::string, unsigned> rmap_ty;
-
- rmap_ty RegisterMap;
-
- void InitRegisterMap(const MCRegisterInfo *info) {
- unsigned numRegs = info->getNumRegs();
-
- for (unsigned i = 0; i < numRegs; ++i) {
- const char *regName = info->getName(i);
- if (regName)
- RegisterMap[regName] = i;
- }
- }
-
- unsigned MatchRegisterName(StringRef Name) {
- rmap_ty::iterator iter = RegisterMap.find(Name.str());
- if (iter != RegisterMap.end())
- return iter->second;
- else
- return 0;
- }
-
- AsmToken LexToken() {
- if (!Lexer) {
- SetError(SMLoc(), "No MCAsmLexer installed");
- return AsmToken(AsmToken::Error, "", 0);
- }
-
- switch (AsmInfo.getAssemblerDialect()) {
- default:
- SetError(SMLoc(), "Unhandled dialect");
- return AsmToken(AsmToken::Error, "", 0);
- case 0:
- return LexTokenUAL();
- }
- }
-public:
- ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
- : MCTargetAsmLexer(T), AsmInfo(MAI) {
- }
-};
-
-class ARMAsmLexer : public ARMBaseAsmLexer {
-public:
- ARMAsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI)
- : ARMBaseAsmLexer(T, MAI) {
- InitRegisterMap(&MRI);
- }
-};
-
-class ThumbAsmLexer : public ARMBaseAsmLexer {
-public:
- ThumbAsmLexer(const Target &T, const MCRegisterInfo &MRI,const MCAsmInfo &MAI)
- : ARMBaseAsmLexer(T, MAI) {
- InitRegisterMap(&MRI);
- }
-};
-
-} // end anonymous namespace
-
-AsmToken ARMBaseAsmLexer::LexTokenUAL() {
- const AsmToken &lexedToken = lexDefinite();
-
- switch (lexedToken.getKind()) {
- default: break;
- case AsmToken::Error:
- SetError(Lexer->getErrLoc(), Lexer->getErr());
- break;
- case AsmToken::Identifier: {
- std::string lowerCase = lexedToken.getString().lower();
-
- unsigned regID = MatchRegisterName(lowerCase);
- // Check for register aliases.
- // r13 -> sp
- // r14 -> lr
- // r15 -> pc
- // ip -> r12
- // FIXME: Some assemblers support lots of others. Do we want them all?
- if (!regID) {
- regID = StringSwitch<unsigned>(lowerCase)
- .Case("r13", ARM::SP)
- .Case("r14", ARM::LR)
- .Case("r15", ARM::PC)
- .Case("ip", ARM::R12)
- .Default(0);
- }
-
- if (regID)
- return AsmToken(AsmToken::Register,
- lexedToken.getString(),
- static_cast<int64_t>(regID));
- }
- }
-
- return AsmToken(lexedToken);
-}
-
-extern "C" void LLVMInitializeARMAsmLexer() {
- RegisterMCAsmLexer<ARMAsmLexer> X(TheARMTarget);
- RegisterMCAsmLexer<ThumbAsmLexer> Y(TheThumbTarget);
-}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index e1e2f6e..ad37a21 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -7,31 +7,31 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMMCExpr.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
using namespace llvm;
@@ -178,7 +178,8 @@ class ARMAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index);
+ OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index,
+ SMLoc &EndLoc);
// Asm Match Converter Methods
void cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &);
@@ -253,21 +254,17 @@ public:
// Implementation of the MCTargetAsmParser interface:
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
- bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
bool ParseDirective(AsmToken DirectiveID);
unsigned checkTargetMatchPredicate(MCInst &Inst);
- bool MatchAndEmitInstruction(SMLoc IDLoc,
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out);
-
- unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- unsigned OperandNum, unsigned &NumMCOperands) {
- return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum, NumMCOperands);
- }
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm);
};
} // end anonymous namespace
@@ -487,7 +484,8 @@ public:
SMLoc getStartLoc() const { return StartLoc; }
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const { return EndLoc; }
-
+ /// getLocRange - Get the range between the first and last token of this
+ /// operand.
SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
ARMCC::CondCodes getCondCode() const {
@@ -863,7 +861,7 @@ public:
bool isSPRRegList() const { return Kind == k_SPRRegisterList; }
bool isToken() const { return Kind == k_Token; }
bool isMemBarrierOpt() const { return Kind == k_MemBarrierOpt; }
- bool isMemory() const { return Kind == k_Memory; }
+ bool isMem() const { return Kind == k_Memory; }
bool isShifterImm() const { return Kind == k_ShifterImmediate; }
bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; }
bool isRegShiftedImm() const { return Kind == k_ShiftedImmediate; }
@@ -874,14 +872,14 @@ public:
return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy ==ARM_AM::no_shift;
}
bool isMemNoOffset(bool alignOK = false) const {
- if (!isMemory())
+ if (!isMem())
return false;
// No offset of any kind.
return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 &&
(alignOK || Memory.Alignment == 0);
}
bool isMemPCRelImm12() const {
- if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Base register must be PC.
if (Memory.BaseRegNum != ARM::PC)
@@ -895,7 +893,7 @@ public:
return isMemNoOffset(true);
}
bool isAddrMode2() const {
- if (!isMemory() || Memory.Alignment != 0) return false;
+ if (!isMem() || Memory.Alignment != 0) return false;
// Check for register offset.
if (Memory.OffsetRegNum) return true;
// Immediate offset in range [-4095, 4095].
@@ -917,7 +915,7 @@ public:
// and we reject it.
if (isImm() && !isa<MCConstantExpr>(getImm()))
return true;
- if (!isMemory() || Memory.Alignment != 0) return false;
+ if (!isMem() || Memory.Alignment != 0) return false;
// No shifts are legal for AM3.
if (Memory.ShiftType != ARM_AM::no_shift) return false;
// Check for register offset.
@@ -947,7 +945,7 @@ public:
// and we reject it.
if (isImm() && !isa<MCConstantExpr>(getImm()))
return true;
- if (!isMemory() || Memory.Alignment != 0) return false;
+ if (!isMem() || Memory.Alignment != 0) return false;
// Check for register offset.
if (Memory.OffsetRegNum) return false;
// Immediate offset in range [-1020, 1020] and a multiple of 4.
@@ -957,25 +955,25 @@ public:
Val == INT32_MIN;
}
bool isMemTBB() const {
- if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
+ if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0)
return false;
return true;
}
bool isMemTBH() const {
- if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
+ if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm != 1 ||
Memory.Alignment != 0 )
return false;
return true;
}
bool isMemRegOffset() const {
- if (!isMemory() || !Memory.OffsetRegNum || Memory.Alignment != 0)
+ if (!isMem() || !Memory.OffsetRegNum || Memory.Alignment != 0)
return false;
return true;
}
bool isT2MemRegOffset() const {
- if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
+ if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
Memory.Alignment != 0)
return false;
// Only lsl #{0, 1, 2, 3} allowed.
@@ -988,14 +986,14 @@ public:
bool isMemThumbRR() const {
// Thumb reg+reg addressing is simple. Just two registers, a base and
// an offset. No shifts, negations or any other complicating factors.
- if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
+ if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0)
return false;
return isARMLowRegister(Memory.BaseRegNum) &&
(!Memory.OffsetRegNum || isARMLowRegister(Memory.OffsetRegNum));
}
bool isMemThumbRIs4() const {
- if (!isMemory() || Memory.OffsetRegNum != 0 ||
+ if (!isMem() || Memory.OffsetRegNum != 0 ||
!isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
return false;
// Immediate offset, multiple of 4 in range [0, 124].
@@ -1004,7 +1002,7 @@ public:
return Val >= 0 && Val <= 124 && (Val % 4) == 0;
}
bool isMemThumbRIs2() const {
- if (!isMemory() || Memory.OffsetRegNum != 0 ||
+ if (!isMem() || Memory.OffsetRegNum != 0 ||
!isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
return false;
// Immediate offset, multiple of 4 in range [0, 62].
@@ -1013,7 +1011,7 @@ public:
return Val >= 0 && Val <= 62 && (Val % 2) == 0;
}
bool isMemThumbRIs1() const {
- if (!isMemory() || Memory.OffsetRegNum != 0 ||
+ if (!isMem() || Memory.OffsetRegNum != 0 ||
!isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
return false;
// Immediate offset in range [0, 31].
@@ -1022,7 +1020,7 @@ public:
return Val >= 0 && Val <= 31;
}
bool isMemThumbSPI() const {
- if (!isMemory() || Memory.OffsetRegNum != 0 ||
+ if (!isMem() || Memory.OffsetRegNum != 0 ||
Memory.BaseRegNum != ARM::SP || Memory.Alignment != 0)
return false;
// Immediate offset, multiple of 4 in range [0, 1020].
@@ -1036,7 +1034,7 @@ public:
// and we reject it.
if (isImm() && !isa<MCConstantExpr>(getImm()))
return true;
- if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset a multiple of 4 in range [-1020, 1020].
if (!Memory.OffsetImm) return true;
@@ -1045,7 +1043,7 @@ public:
return (Val >= -1020 && Val <= 1020 && (Val & 3) == 0) || Val == INT32_MIN;
}
bool isMemImm0_1020s4Offset() const {
- if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset a multiple of 4 in range [0, 1020].
if (!Memory.OffsetImm) return true;
@@ -1053,7 +1051,7 @@ public:
return Val >= 0 && Val <= 1020 && (Val & 3) == 0;
}
bool isMemImm8Offset() const {
- if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Base reg of PC isn't allowed for these encodings.
if (Memory.BaseRegNum == ARM::PC) return false;
@@ -1063,7 +1061,7 @@ public:
return (Val == INT32_MIN) || (Val > -256 && Val < 256);
}
bool isMemPosImm8Offset() const {
- if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset in range [0, 255].
if (!Memory.OffsetImm) return true;
@@ -1071,7 +1069,7 @@ public:
return Val >= 0 && Val < 256;
}
bool isMemNegImm8Offset() const {
- if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Base reg of PC isn't allowed for these encodings.
if (Memory.BaseRegNum == ARM::PC) return false;
@@ -1081,7 +1079,7 @@ public:
return (Val == INT32_MIN) || (Val > -256 && Val < 0);
}
bool isMemUImm12Offset() const {
- if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset in range [0, 4095].
if (!Memory.OffsetImm) return true;
@@ -1095,7 +1093,7 @@ public:
if (isImm() && !isa<MCConstantExpr>(getImm()))
return true;
- if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset in range [-4095, 4095].
if (!Memory.OffsetImm) return true;
@@ -2453,8 +2451,8 @@ static unsigned MatchRegisterName(StringRef Name);
bool ARMAsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
StartLoc = Parser.getTok().getLoc();
+ EndLoc = Parser.getTok().getEndLoc();
RegNo = tryParseRegister();
- EndLoc = Parser.getTok().getLoc();
return (RegNo == (unsigned)-1);
}
@@ -2543,6 +2541,8 @@ int ARMAsmParser::tryParseShiftRegister(
if (!PrevOp->isReg())
return Error(PrevOp->getStartLoc(), "shift must be of a register");
int SrcReg = PrevOp->getReg();
+
+ SMLoc EndLoc;
int64_t Imm = 0;
int ShiftReg = 0;
if (ShiftTy == ARM_AM::rrx) {
@@ -2557,7 +2557,7 @@ int ARMAsmParser::tryParseShiftRegister(
Parser.Lex(); // Eat hash.
SMLoc ImmLoc = Parser.getTok().getLoc();
const MCExpr *ShiftExpr = 0;
- if (getParser().ParseExpression(ShiftExpr)) {
+ if (getParser().ParseExpression(ShiftExpr, EndLoc)) {
Error(ImmLoc, "invalid immediate shift value");
return -1;
}
@@ -2582,8 +2582,9 @@ int ARMAsmParser::tryParseShiftRegister(
if (Imm == 0)
ShiftTy = ARM_AM::lsl;
} else if (Parser.getTok().is(AsmToken::Identifier)) {
- ShiftReg = tryParseRegister();
SMLoc L = Parser.getTok().getLoc();
+ EndLoc = Parser.getTok().getEndLoc();
+ ShiftReg = tryParseRegister();
if (ShiftReg == -1) {
Error (L, "expected immediate or register in shift operand");
return -1;
@@ -2598,10 +2599,10 @@ int ARMAsmParser::tryParseShiftRegister(
if (ShiftReg && ShiftTy != ARM_AM::rrx)
Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg,
ShiftReg, Imm,
- S, Parser.getTok().getLoc()));
+ S, EndLoc));
else
Operands.push_back(ARMOperand::CreateShiftedImmediate(ShiftTy, SrcReg, Imm,
- S, Parser.getTok().getLoc()));
+ S, EndLoc));
return 0;
}
@@ -2615,12 +2616,13 @@ int ARMAsmParser::tryParseShiftRegister(
/// parse for a specific register type.
bool ARMAsmParser::
tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &RegTok = Parser.getTok();
int RegNo = tryParseRegister();
if (RegNo == -1)
return true;
- Operands.push_back(ARMOperand::CreateReg(RegNo, S, Parser.getTok().getLoc()));
+ Operands.push_back(ARMOperand::CreateReg(RegNo, RegTok.getLoc(),
+ RegTok.getEndLoc()));
const AsmToken &ExclaimTok = Parser.getTok();
if (ExclaimTok.is(AsmToken::Exclaim)) {
@@ -2644,10 +2646,10 @@ tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!MCE)
return TokError("immediate value expected for vector index");
- SMLoc E = Parser.getTok().getLoc();
if (Parser.getTok().isNot(AsmToken::RBrac))
- return Error(E, "']' expected");
+ return Error(Parser.getTok().getLoc(), "']' expected");
+ SMLoc E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat right bracket token.
Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(),
@@ -2797,7 +2799,7 @@ parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Check for and consume the closing '}'
if (Parser.getTok().isNot(AsmToken::RCurly))
return MatchOperand_ParseFail;
- SMLoc E = Parser.getTok().getLoc();
+ SMLoc E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat the '}'
Operands.push_back(ARMOperand::CreateCoprocOption(Val, S, E));
@@ -2894,10 +2896,10 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.getTok().is(AsmToken::Minus)) {
if (Parser.getTok().is(AsmToken::Minus)) {
Parser.Lex(); // Eat the minus.
- SMLoc EndLoc = Parser.getTok().getLoc();
+ SMLoc AfterMinusLoc = Parser.getTok().getLoc();
int EndReg = tryParseRegister();
if (EndReg == -1)
- return Error(EndLoc, "register expected");
+ return Error(AfterMinusLoc, "register expected");
// Allow Q regs and just interpret them as the two D sub-registers.
if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
EndReg = getDRegFromQReg(EndReg) + 1;
@@ -2907,10 +2909,10 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
continue;
// The register must be in the same register class as the first.
if (!RC->contains(EndReg))
- return Error(EndLoc, "invalid register in register list");
+ return Error(AfterMinusLoc, "invalid register in register list");
// Ranges must go from low to high.
if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg))
- return Error(EndLoc, "bad range in register list");
+ return Error(AfterMinusLoc, "bad range in register list");
// Add all the registers in the range to the register list.
while (Reg != EndReg) {
@@ -2958,9 +2960,9 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Registers.push_back(std::pair<unsigned, SMLoc>(++Reg, RegLoc));
}
- SMLoc E = Parser.getTok().getLoc();
if (Parser.getTok().isNot(AsmToken::RCurly))
- return Error(E, "'}' expected");
+ return Error(Parser.getTok().getLoc(), "'}' expected");
+ SMLoc E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat '}' token.
// Push the register list operand.
@@ -2977,13 +2979,14 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Helper function to parse the lane index for vector lists.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) {
+parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
Index = 0; // Always return a defined index value.
if (Parser.getTok().is(AsmToken::LBrac)) {
Parser.Lex(); // Eat the '['.
if (Parser.getTok().is(AsmToken::RBrac)) {
// "Dn[]" is the 'all lanes' syntax.
LaneKind = AllLanes;
+ EndLoc = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat the ']'.
return MatchOperand_Success;
}
@@ -3008,6 +3011,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) {
Error(Parser.getTok().getLoc(), "']' expected");
return MatchOperand_ParseFail;
}
+ EndLoc = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat the ']'.
int64_t Val = CE->getValue();
@@ -3034,21 +3038,19 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// (without encosing curly braces) as a single or double entry list,
// respectively.
if (Parser.getTok().is(AsmToken::Identifier)) {
+ SMLoc E = Parser.getTok().getEndLoc();
int Reg = tryParseRegister();
if (Reg == -1)
return MatchOperand_NoMatch;
- SMLoc E = Parser.getTok().getLoc();
if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
- OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+ OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex, E);
if (Res != MatchOperand_Success)
return Res;
switch (LaneKind) {
case NoLanes:
- E = Parser.getTok().getLoc();
Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E));
break;
case AllLanes:
- E = Parser.getTok().getLoc();
Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, false,
S, E));
break;
@@ -3062,18 +3064,16 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
Reg = getDRegFromQReg(Reg);
- OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+ OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex, E);
if (Res != MatchOperand_Success)
return Res;
switch (LaneKind) {
case NoLanes:
- E = Parser.getTok().getLoc();
Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
&ARMMCRegisterClasses[ARM::DPairRegClassID]);
Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E));
break;
case AllLanes:
- E = Parser.getTok().getLoc();
Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
&ARMMCRegisterClasses[ARM::DPairRegClassID]);
Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false,
@@ -3114,7 +3114,9 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
++Reg;
++Count;
}
- if (parseVectorLane(LaneKind, LaneIndex) != MatchOperand_Success)
+
+ SMLoc E;
+ if (parseVectorLane(LaneKind, LaneIndex, E) != MatchOperand_Success)
return MatchOperand_ParseFail;
while (Parser.getTok().is(AsmToken::Comma) ||
@@ -3128,10 +3130,10 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_ParseFail;
}
Parser.Lex(); // Eat the minus.
- SMLoc EndLoc = Parser.getTok().getLoc();
+ SMLoc AfterMinusLoc = Parser.getTok().getLoc();
int EndReg = tryParseRegister();
if (EndReg == -1) {
- Error(EndLoc, "register expected");
+ Error(AfterMinusLoc, "register expected");
return MatchOperand_ParseFail;
}
// Allow Q regs and just interpret them as the two D sub-registers.
@@ -3143,24 +3145,24 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
continue;
// The register must be in the same register class as the first.
if (!ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg)) {
- Error(EndLoc, "invalid register in register list");
+ Error(AfterMinusLoc, "invalid register in register list");
return MatchOperand_ParseFail;
}
// Ranges must go from low to high.
if (Reg > EndReg) {
- Error(EndLoc, "bad range in register list");
+ Error(AfterMinusLoc, "bad range in register list");
return MatchOperand_ParseFail;
}
// Parse the lane specifier if present.
VectorLaneTy NextLaneKind;
unsigned NextLaneIndex;
- if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ if (parseVectorLane(NextLaneKind, NextLaneIndex, E) !=
+ MatchOperand_Success)
return MatchOperand_ParseFail;
if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
- Error(EndLoc, "mismatched lane index in register list");
+ Error(AfterMinusLoc, "mismatched lane index in register list");
return MatchOperand_ParseFail;
}
- EndLoc = Parser.getTok().getLoc();
// Add all the registers in the range to the register list.
Count += EndReg - Reg;
@@ -3199,11 +3201,12 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Parse the lane specifier if present.
VectorLaneTy NextLaneKind;
unsigned NextLaneIndex;
- SMLoc EndLoc = Parser.getTok().getLoc();
- if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ SMLoc LaneLoc = Parser.getTok().getLoc();
+ if (parseVectorLane(NextLaneKind, NextLaneIndex, E) !=
+ MatchOperand_Success)
return MatchOperand_ParseFail;
if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
- Error(EndLoc, "mismatched lane index in register list");
+ Error(LaneLoc, "mismatched lane index in register list");
return MatchOperand_ParseFail;
}
continue;
@@ -3224,7 +3227,7 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
VectorLaneTy NextLaneKind;
unsigned NextLaneIndex;
SMLoc EndLoc = Parser.getTok().getLoc();
- if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ if (parseVectorLane(NextLaneKind, NextLaneIndex, E) != MatchOperand_Success)
return MatchOperand_ParseFail;
if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
Error(EndLoc, "mismatched lane index in register list");
@@ -3232,11 +3235,11 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
}
- SMLoc E = Parser.getTok().getLoc();
if (Parser.getTok().isNot(AsmToken::RCurly)) {
- Error(E, "'}' expected");
+ Error(Parser.getTok().getLoc(), "'}' expected");
return MatchOperand_ParseFail;
}
+ E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat '}' token.
switch (LaneKind) {
@@ -3377,7 +3380,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
- assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ if (!Tok.is(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
StringRef Mask = Tok.getString();
if (isMClass()) {
@@ -3527,7 +3531,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
const MCExpr *ShiftAmount;
SMLoc Loc = Parser.getTok().getLoc();
- if (getParser().ParseExpression(ShiftAmount)) {
+ SMLoc EndLoc;
+ if (getParser().ParseExpression(ShiftAmount, EndLoc)) {
Error(Loc, "illegal expression");
return MatchOperand_ParseFail;
}
@@ -3542,7 +3547,7 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
return MatchOperand_ParseFail;
}
- Operands.push_back(ARMOperand::CreateImm(CE, Loc, Parser.getTok().getLoc()));
+ Operands.push_back(ARMOperand::CreateImm(CE, Loc, EndLoc));
return MatchOperand_Success;
}
@@ -3552,7 +3557,7 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
const AsmToken &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
if (Tok.isNot(AsmToken::Identifier)) {
- Error(Tok.getLoc(), "'be' or 'le' operand expected");
+ Error(S, "'be' or 'le' operand expected");
return MatchOperand_ParseFail;
}
int Val = StringSwitch<int>(Tok.getString())
@@ -3562,12 +3567,12 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the token.
if (Val == -1) {
- Error(Tok.getLoc(), "'be' or 'le' operand expected");
+ Error(S, "'be' or 'le' operand expected");
return MatchOperand_ParseFail;
}
Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::Create(Val,
getContext()),
- S, Parser.getTok().getLoc()));
+ S, Tok.getEndLoc()));
return MatchOperand_Success;
}
@@ -3603,16 +3608,17 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_ParseFail;
}
Parser.Lex(); // Eat hash token.
+ SMLoc ExLoc = Parser.getTok().getLoc();
const MCExpr *ShiftAmount;
- SMLoc E = Parser.getTok().getLoc();
- if (getParser().ParseExpression(ShiftAmount)) {
- Error(E, "malformed shift expression");
+ SMLoc EndLoc;
+ if (getParser().ParseExpression(ShiftAmount, EndLoc)) {
+ Error(ExLoc, "malformed shift expression");
return MatchOperand_ParseFail;
}
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
if (!CE) {
- Error(E, "shift amount must be an immediate");
+ Error(ExLoc, "shift amount must be an immediate");
return MatchOperand_ParseFail;
}
@@ -3620,25 +3626,24 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (isASR) {
// Shift amount must be in [1,32]
if (Val < 1 || Val > 32) {
- Error(E, "'asr' shift amount must be in range [1,32]");
+ Error(ExLoc, "'asr' shift amount must be in range [1,32]");
return MatchOperand_ParseFail;
}
// asr #32 encoded as asr #0, but is not allowed in Thumb2 mode.
if (isThumb() && Val == 32) {
- Error(E, "'asr #32' shift amount not allowed in Thumb mode");
+ Error(ExLoc, "'asr #32' shift amount not allowed in Thumb mode");
return MatchOperand_ParseFail;
}
if (Val == 32) Val = 0;
} else {
// Shift amount must be in [1,32]
if (Val < 0 || Val > 31) {
- Error(E, "'lsr' shift amount must be in range [0,31]");
+ Error(ExLoc, "'lsr' shift amount must be in range [0,31]");
return MatchOperand_ParseFail;
}
}
- E = Parser.getTok().getLoc();
- Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, E));
+ Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, EndLoc));
return MatchOperand_Success;
}
@@ -3664,16 +3669,17 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_ParseFail;
}
Parser.Lex(); // Eat hash token.
+ SMLoc ExLoc = Parser.getTok().getLoc();
const MCExpr *ShiftAmount;
- SMLoc E = Parser.getTok().getLoc();
- if (getParser().ParseExpression(ShiftAmount)) {
- Error(E, "malformed rotate expression");
+ SMLoc EndLoc;
+ if (getParser().ParseExpression(ShiftAmount, EndLoc)) {
+ Error(ExLoc, "malformed rotate expression");
return MatchOperand_ParseFail;
}
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
if (!CE) {
- Error(E, "rotate amount must be an immediate");
+ Error(ExLoc, "rotate amount must be an immediate");
return MatchOperand_ParseFail;
}
@@ -3682,12 +3688,11 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// normally, zero is represented in asm by omitting the rotate operand
// entirely.
if (Val != 8 && Val != 16 && Val != 24 && Val != 0) {
- Error(E, "'ror' rotate amount must be 8, 16, or 24");
+ Error(ExLoc, "'ror' rotate amount must be 8, 16, or 24");
return MatchOperand_ParseFail;
}
- E = Parser.getTok().getLoc();
- Operands.push_back(ARMOperand::CreateRotImm(Val, S, E));
+ Operands.push_back(ARMOperand::CreateRotImm(Val, S, EndLoc));
return MatchOperand_Success;
}
@@ -3737,7 +3742,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat hash token.
const MCExpr *WidthExpr;
- if (getParser().ParseExpression(WidthExpr)) {
+ SMLoc EndLoc;
+ if (getParser().ParseExpression(WidthExpr, EndLoc)) {
Error(E, "malformed immediate expression");
return MatchOperand_ParseFail;
}
@@ -3753,9 +3759,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Error(E, "'width' operand must be in the range [1,32-lsb]");
return MatchOperand_ParseFail;
}
- E = Parser.getTok().getLoc();
- Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, E));
+ Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, EndLoc));
return MatchOperand_Success;
}
@@ -3774,7 +3779,6 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Tok.getLoc();
bool haveEaten = false;
bool isAdd = true;
- int Reg = -1;
if (Tok.is(AsmToken::Plus)) {
Parser.Lex(); // Eat the '+' token.
haveEaten = true;
@@ -3783,15 +3787,15 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
isAdd = false;
haveEaten = true;
}
- if (Parser.getTok().is(AsmToken::Identifier))
- Reg = tryParseRegister();
+
+ SMLoc E = Parser.getTok().getEndLoc();
+ int Reg = tryParseRegister();
if (Reg == -1) {
if (!haveEaten)
return MatchOperand_NoMatch;
Error(Parser.getTok().getLoc(), "register expected");
return MatchOperand_ParseFail;
}
- SMLoc E = Parser.getTok().getLoc();
ARM_AM::ShiftOpc ShiftTy = ARM_AM::no_shift;
unsigned ShiftImm = 0;
@@ -3799,6 +3803,9 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the ','.
if (parseMemRegOffsetShift(ShiftTy, ShiftImm))
return MatchOperand_ParseFail;
+
+ // FIXME: Only approximates end...may include intervening whitespace.
+ E = Parser.getTok().getLoc();
}
Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ShiftTy,
@@ -3831,14 +3838,14 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// differently.
bool isNegative = Parser.getTok().is(AsmToken::Minus);
const MCExpr *Offset;
- if (getParser().ParseExpression(Offset))
+ SMLoc E;
+ if (getParser().ParseExpression(Offset, E))
return MatchOperand_ParseFail;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset);
if (!CE) {
Error(S, "constant expression expected");
return MatchOperand_ParseFail;
}
- SMLoc E = Tok.getLoc();
// Negative zero is encoded as the flag value INT32_MIN.
int32_t Val = CE->getValue();
if (isNegative && Val == 0)
@@ -3853,7 +3860,6 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
bool haveEaten = false;
bool isAdd = true;
- int Reg = -1;
if (Tok.is(AsmToken::Plus)) {
Parser.Lex(); // Eat the '+' token.
haveEaten = true;
@@ -3862,18 +3868,18 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
isAdd = false;
haveEaten = true;
}
- if (Parser.getTok().is(AsmToken::Identifier))
- Reg = tryParseRegister();
+
+ Tok = Parser.getTok();
+ int Reg = tryParseRegister();
if (Reg == -1) {
if (!haveEaten)
return MatchOperand_NoMatch;
- Error(Parser.getTok().getLoc(), "register expected");
+ Error(Tok.getLoc(), "register expected");
return MatchOperand_ParseFail;
}
- SMLoc E = Parser.getTok().getLoc();
Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ARM_AM::no_shift,
- 0, S, E));
+ 0, S, Tok.getEndLoc()));
return MatchOperand_Success;
}
@@ -4226,7 +4232,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return Error(Tok.getLoc(), "malformed memory operand");
if (Tok.is(AsmToken::RBrac)) {
- E = Tok.getLoc();
+ E = Tok.getEndLoc();
Parser.Lex(); // Eat right bracket token.
Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, ARM_AM::no_shift,
@@ -4274,9 +4280,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
// Now we should have the closing ']'
- E = Parser.getTok().getLoc();
if (Parser.getTok().isNot(AsmToken::RBrac))
- return Error(E, "']' expected");
+ return Error(Parser.getTok().getLoc(), "']' expected");
+ E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat right bracket token.
// Don't worry about range checking the value here. That's handled by
@@ -4323,9 +4329,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
CE = MCConstantExpr::Create(INT32_MIN, getContext());
// Now we should have the closing ']'
- E = Parser.getTok().getLoc();
if (Parser.getTok().isNot(AsmToken::RBrac))
- return Error(E, "']' expected");
+ return Error(Parser.getTok().getLoc(), "']' expected");
+ E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat right bracket token.
// Don't worry about range checking the value here. That's handled by
@@ -4369,9 +4375,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
// Now we should have the closing ']'
- E = Parser.getTok().getLoc();
if (Parser.getTok().isNot(AsmToken::RBrac))
- return Error(E, "']' expected");
+ return Error(Parser.getTok().getLoc(), "']' expected");
+ E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat right bracket token.
Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, OffsetRegNum,
@@ -4439,6 +4445,12 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
((St == ARM_AM::lsl || St == ARM_AM::ror) && Imm > 31) ||
((St == ARM_AM::lsr || St == ARM_AM::asr) && Imm > 32))
return Error(Loc, "immediate shift value out of range");
+ // If <ShiftTy> #0, turn it into a no_shift.
+ if (Imm == 0)
+ St = ARM_AM::lsl;
+ // For consistency, treat lsr #32 and asr #32 as having immediate value 0.
+ if (Imm == 32)
+ Imm = 0;
Amount = Imm;
}
@@ -4616,7 +4628,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return true;
const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal,
- getContext());
+ getContext());
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E));
return false;
@@ -4951,7 +4963,8 @@ static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features);
/// Parse an arm instruction mnemonic followed by its operands.
-bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
+bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Apply mnemonic aliases before doing anything else, as the destination
// mnemnonic may include suffices and we want to handle them normally.
@@ -5182,6 +5195,45 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
}
}
+ // Adjust operands of ldrexd/strexd to MCK_GPRPair.
+ // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
+ // a single GPRPair reg operand is used in the .td file to replace the two
+ // GPRs. However, when parsing from asm, the two GRPs cannot be automatically
+ // expressed as a GPRPair, so we have to manually merge them.
+ // FIXME: We would really like to be able to tablegen'erate this.
+ if (!isThumb() && Operands.size() > 4 &&
+ (Mnemonic == "ldrexd" || Mnemonic == "strexd")) {
+ bool isLoad = (Mnemonic == "ldrexd");
+ unsigned Idx = isLoad ? 2 : 3;
+ ARMOperand* Op1 = static_cast<ARMOperand*>(Operands[Idx]);
+ ARMOperand* Op2 = static_cast<ARMOperand*>(Operands[Idx+1]);
+
+ const MCRegisterClass& MRC = MRI->getRegClass(ARM::GPRRegClassID);
+ // Adjust only if Op1 and Op2 are GPRs.
+ if (Op1->isReg() && Op2->isReg() && MRC.contains(Op1->getReg()) &&
+ MRC.contains(Op2->getReg())) {
+ unsigned Reg1 = Op1->getReg();
+ unsigned Reg2 = Op2->getReg();
+ unsigned Rt = MRI->getEncodingValue(Reg1);
+ unsigned Rt2 = MRI->getEncodingValue(Reg2);
+
+ // Rt2 must be Rt + 1 and Rt must be even.
+ if (Rt + 1 != Rt2 || (Rt & 1)) {
+ Error(Op2->getStartLoc(), isLoad ?
+ "destination operands must be sequential" :
+ "source operands must be sequential");
+ return true;
+ }
+ unsigned NewReg = MRI->getMatchingSuperReg(Reg1, ARM::gsub_0,
+ &(MRI->getRegClass(ARM::GPRPairRegClassID)));
+ Operands.erase(Operands.begin() + Idx, Operands.begin() + Idx + 2);
+ Operands.insert(Operands.begin() + Idx, ARMOperand::CreateReg(
+ NewReg, Op1->getStartLoc(), Op2->getEndLoc()));
+ delete Op1;
+ delete Op2;
+ }
+ }
+
return false;
}
@@ -5269,8 +5321,7 @@ validateInstruction(MCInst &Inst,
switch (Inst.getOpcode()) {
case ARM::LDRD:
case ARM::LDRD_PRE:
- case ARM::LDRD_POST:
- case ARM::LDREXD: {
+ case ARM::LDRD_POST: {
// Rt2 must be Rt + 1.
unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
@@ -5289,8 +5340,7 @@ validateInstruction(MCInst &Inst,
return false;
}
case ARM::STRD_PRE:
- case ARM::STRD_POST:
- case ARM::STREXD: {
+ case ARM::STRD_POST: {
// Rt2 must be Rt + 1.
unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg());
unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(2).getReg());
@@ -5665,9 +5715,28 @@ bool ARMAsmParser::
processInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
switch (Inst.getOpcode()) {
+ // Alias for alternate form of 'ADR Rd, #imm' instruction.
+ case ARM::ADDri: {
+ if (Inst.getOperand(1).getReg() != ARM::PC ||
+ Inst.getOperand(5).getReg() != 0)
+ return false;
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::ADR);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
// Aliases for alternate PC+imm syntax of LDR instructions.
case ARM::t2LDRpcrel:
- Inst.setOpcode(ARM::t2LDRpci);
+ // Select the narrow version if the immediate will fit.
+ if (Inst.getOperand(1).getImm() > 0 &&
+ Inst.getOperand(1).getImm() <= 0xff)
+ Inst.setOpcode(ARM::tLDRpci);
+ else
+ Inst.setOpcode(ARM::t2LDRpci);
return true;
case ARM::t2LDRBpcrel:
Inst.setOpcode(ARM::t2LDRBpci);
@@ -7458,15 +7527,15 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
static const char *getSubtargetFeatureName(unsigned Val);
bool ARMAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc,
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out) {
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
MCInst Inst;
- unsigned Kind;
- unsigned ErrorInfo;
unsigned MatchResult;
- MatchResult = MatchInstructionImpl(Operands, Kind, Inst, ErrorInfo);
+ MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
+ MatchingInlineAsm);
switch (MatchResult) {
default: break;
case Match_Success:
@@ -7768,13 +7837,10 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
return true;
}
-extern "C" void LLVMInitializeARMAsmLexer();
-
/// Force static initialization.
extern "C" void LLVMInitializeARMAsmParser() {
RegisterMCAsmParser<ARMAsmParser> X(TheARMTarget);
RegisterMCAsmParser<ARMAsmParser> Y(TheThumbTarget);
- LLVMInitializeARMAsmLexer();
}
#define GET_REGISTER_MATCHER
diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt
index e24a1b1..d2012c3 100644
--- a/lib/Target/ARM/AsmParser/CMakeLists.txt
+++ b/lib/Target/ARM/AsmParser/CMakeLists.txt
@@ -1,7 +1,6 @@
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMARMAsmParser
- ARMAsmLexer.cpp
ARMAsmParser.cpp
)
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index ac916cc..586834c 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -11,7 +11,6 @@ tablegen(LLVM ARMGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM ARMGenFastISel.inc -gen-fast-isel)
tablegen(LLVM ARMGenCallingConv.inc -gen-callingconv)
tablegen(LLVM ARMGenSubtargetInfo.inc -gen-subtarget)
-tablegen(LLVM ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler)
add_public_tablegen_target(ARMCommonTableGen)
@@ -22,7 +21,6 @@ add_llvm_target(ARMCodeGen
ARMCodeEmitter.cpp
ARMConstantIslandPass.cpp
ARMConstantPoolValue.cpp
- ARMELFWriterInfo.cpp
ARMExpandPseudoInsts.cpp
ARMFastISel.cpp
ARMFrameLowering.cpp
@@ -39,6 +37,7 @@ add_llvm_target(ARMCodeGen
ARMSubtarget.cpp
ARMTargetMachine.cpp
ARMTargetObjectFile.cpp
+ ARMTargetTransformInfo.cpp
MLxExpansionPass.cpp
Thumb1FrameLowering.cpp
Thumb1InstrInfo.cpp
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 57642e1..31a3b0b 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -9,21 +9,20 @@
#define DEBUG_TYPE "arm-disassembler"
+#include "llvm/MC/MCDisassembler.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "MCTargetDesc/ARMMCExpr.h"
#include "MCTargetDesc/ARMBaseInfo.h"
-#include "llvm/MC/EDInstInfo.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCExpr.h"
+#include "MCTargetDesc/ARMMCExpr.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <vector>
@@ -105,10 +104,6 @@ public:
uint64_t address,
raw_ostream &vStream,
raw_ostream &cStream) const;
-
- /// getEDInfo - See MCDisassembler.
- const EDInstInfo *getEDInfo() const;
-private:
};
/// ThumbDisassembler - Thumb disassembler for all Thumb platforms.
@@ -131,8 +126,6 @@ public:
raw_ostream &vStream,
raw_ostream &cStream) const;
- /// getEDInfo - See MCDisassembler.
- const EDInstInfo *getEDInfo() const;
private:
mutable ITStatus ITBlock;
DecodeStatus AddThumbPredicate(MCInst&) const;
@@ -385,7 +378,6 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
#include "ARMGenDisassemblerTables.inc"
-#include "ARMGenEDInfo.inc"
static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) {
return new ARMDisassembler(STI);
@@ -395,14 +387,6 @@ static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtarge
return new ThumbDisassembler(STI);
}
-const EDInstInfo *ARMDisassembler::getEDInfo() const {
- return instInfoARM;
-}
-
-const EDInstInfo *ThumbDisassembler::getEDInfo() const {
- return instInfoARM;
-}
-
DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
const MemoryObject &Region,
uint64_t Address,
@@ -525,8 +509,9 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
else
ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
const char *ReferenceName;
- const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
- &ReferenceName);
+ uint64_t SymbolValue = 0x00000000ffffffffULL & Value;
+ const char *Name = SymbolLookUp(DisInfo, SymbolValue, &ReferenceType,
+ Address, &ReferenceName);
if (Name) {
SymbolicOp.AddSymbol.Name = Name;
SymbolicOp.AddSymbol.Present = true;
@@ -1280,7 +1265,13 @@ static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val,
unsigned lsb = fieldFromInstruction(Val, 0, 5);
DecodeStatus S = MCDisassembler::Success;
- if (lsb > msb) Check(S, MCDisassembler::SoftFail);
+ if (lsb > msb) {
+ Check(S, MCDisassembler::SoftFail);
+ // The check above will cause the warning for the "potentially undefined
+ // instruction encoding" but we can't build a bad MCOperand value here
+ // with a lsb > msb or else printing the MCInst will cause a crash.
+ lsb = msb;
+ }
uint32_t msb_mask = 0xFFFFFFFF;
if (msb != 31) msb_mask = (1U << (msb+1)) - 1;
@@ -1523,6 +1514,8 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
}
unsigned amt = fieldFromInstruction(Insn, 7, 5);
+ if (Opc == ARM_AM::ror && amt == 0)
+ Opc = ARM_AM::rrx;
unsigned imm = ARM_AM::getAM2Opc(Op, amt, Opc, idx_mode);
Inst.addOperand(MCOperand::CreateImm(imm));
@@ -1564,6 +1557,9 @@ static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val,
break;
}
+ if (ShOp == ARM_AM::ror && imm == 0)
+ ShOp = ARM_AM::rrx;
+
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
@@ -2089,16 +2085,28 @@ static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val,
static DecodeStatus
DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
- DecodeStatus S = MCDisassembler::Success;
- unsigned imm = (fieldFromInstruction(Insn, 0, 11) << 0) |
- (fieldFromInstruction(Insn, 11, 1) << 18) |
- (fieldFromInstruction(Insn, 13, 1) << 17) |
- (fieldFromInstruction(Insn, 16, 6) << 11) |
- (fieldFromInstruction(Insn, 26, 1) << 19);
- if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<20>(imm<<1) + 4,
+ DecodeStatus Status = MCDisassembler::Success;
+
+ // Note the J1 and J2 values are from the encoded instruction. So here
+ // change them to I1 and I2 values via as documented:
+ // I1 = NOT(J1 EOR S);
+ // I2 = NOT(J2 EOR S);
+ // and build the imm32 with one trailing zero as documented:
+ // imm32 = SignExtend(S:I1:I2:imm10:imm11:'0', 32);
+ unsigned S = fieldFromInstruction(Insn, 26, 1);
+ unsigned J1 = fieldFromInstruction(Insn, 13, 1);
+ unsigned J2 = fieldFromInstruction(Insn, 11, 1);
+ unsigned I1 = !(J1 ^ S);
+ unsigned I2 = !(J2 ^ S);
+ unsigned imm10 = fieldFromInstruction(Insn, 16, 10);
+ unsigned imm11 = fieldFromInstruction(Insn, 0, 11);
+ unsigned tmp = (S << 23) | (I1 << 22) | (I2 << 21) | (imm10 << 11) | imm11;
+ int imm32 = SignExtend32<24>(tmp << 1);
+ if (!tryAddingSymbolicOperand(Address, Address + imm32 + 4,
true, 4, Inst, Decoder))
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<20>(imm << 1)));
- return S;
+ Inst.addOperand(MCOperand::CreateImm(imm32));
+
+ return Status;
}
static DecodeStatus
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 8b9109e..d48b37e 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -13,11 +13,11 @@
#define DEBUG_TYPE "asm-printer"
#include "ARMInstPrinter.h"
-#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/MC/MCInst.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/raw_ostream.h"
@@ -29,11 +29,33 @@ using namespace llvm;
///
/// getSORegOffset returns an integer from 0-31, representing '32' as 0.
static unsigned translateShiftImm(unsigned imm) {
+ // lsr #32 and asr #32 exist, but should be encoded as a 0.
+ assert((imm & ~0x1f) == 0 && "Invalid shift encoding");
+
if (imm == 0)
return 32;
return imm;
}
+/// Prints the shift value with an immediate value.
+static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc,
+ unsigned ShImm, bool UseMarkup) {
+ if (ShOpc == ARM_AM::no_shift || (ShOpc == ARM_AM::lsl && !ShImm))
+ return;
+ O << ", ";
+
+ assert (!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0");
+ O << getShiftOpcStr(ShOpc);
+
+ if (ShOpc != ARM_AM::rrx) {
+ O << " ";
+ if (UseMarkup)
+ O << "<imm:";
+ O << "#" << translateShiftImm(ShImm);
+ if (UseMarkup)
+ O << ">";
+ }
+}
ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI,
const MCInstrInfo &MII,
@@ -45,7 +67,9 @@ ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI,
}
void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- OS << getRegisterName(RegNo);
+ OS << markup("<reg:")
+ << getRegisterName(RegNo)
+ << markup(">");
}
void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
@@ -85,10 +109,13 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
printSBitModifierOperand(MI, 6, O);
printPredicateOperand(MI, 4, O);
- O << '\t' << getRegisterName(Dst.getReg())
- << ", " << getRegisterName(MO1.getReg());
+ O << '\t';
+ printRegName(O, Dst.getReg());
+ O << ", ";
+ printRegName(O, MO1.getReg());
- O << ", " << getRegisterName(MO2.getReg());
+ O << ", ";
+ printRegName(O, MO2.getReg());
assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
printAnnotation(O, Annot);
return;
@@ -104,15 +131,20 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
printSBitModifierOperand(MI, 5, O);
printPredicateOperand(MI, 3, O);
- O << '\t' << getRegisterName(Dst.getReg())
- << ", " << getRegisterName(MO1.getReg());
+ O << '\t';
+ printRegName(O, Dst.getReg());
+ O << ", ";
+ printRegName(O, MO1.getReg());
if (ARM_AM::getSORegShOp(MO2.getImm()) == ARM_AM::rrx) {
printAnnotation(O, Annot);
return;
}
- O << ", #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()));
+ O << ", "
+ << markup("<imm:")
+ << "#" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()))
+ << markup(">");
printAnnotation(O, Annot);
return;
}
@@ -136,7 +168,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
MI->getOperand(3).getImm() == -4) {
O << '\t' << "push";
printPredicateOperand(MI, 4, O);
- O << "\t{" << getRegisterName(MI->getOperand(1).getReg()) << "}";
+ O << "\t{";
+ printRegName(O, MI->getOperand(1).getReg());
+ O << "}";
printAnnotation(O, Annot);
return;
}
@@ -159,7 +193,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
MI->getOperand(4).getImm() == 4) {
O << '\t' << "pop";
printPredicateOperand(MI, 5, O);
- O << "\t{" << getRegisterName(MI->getOperand(0).getReg()) << "}";
+ O << "\t{";
+ printRegName(O, MI->getOperand(0).getReg());
+ O << "}";
printAnnotation(O, Annot);
return;
}
@@ -198,7 +234,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
O << "\tldm";
printPredicateOperand(MI, 1, O);
- O << '\t' << getRegisterName(BaseReg);
+ O << '\t';
+ printRegName(O, BaseReg);
if (Writeback) O << "!";
O << ", ";
printRegisterList(MI, 3, O);
@@ -215,6 +252,35 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
+ // Combine 2 GPRs from disassember into a GPRPair to match with instr def.
+ // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
+ // a single GPRPair reg operand is used in the .td file to replace the two
+ // GPRs. However, when decoding them, the two GRPs cannot be automatically
+ // expressed as a GPRPair, so we have to manually merge them.
+ // FIXME: We would really like to be able to tablegen'erate this.
+ if (Opcode == ARM::LDREXD || Opcode == ARM::STREXD) {
+ const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID);
+ bool isStore = Opcode == ARM::STREXD;
+ unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg();
+ if (MRC.contains(Reg)) {
+ MCInst NewMI;
+ MCOperand NewReg;
+ NewMI.setOpcode(Opcode);
+
+ if (isStore)
+ NewMI.addOperand(MI->getOperand(0));
+ NewReg = MCOperand::CreateReg(MRI.getMatchingSuperReg(Reg, ARM::gsub_0,
+ &MRI.getRegClass(ARM::GPRPairRegClassID)));
+ NewMI.addOperand(NewReg);
+
+ // Copy the rest operands into NewMI.
+ for(unsigned i= isStore ? 3 : 2; i < MI->getNumOperands(); ++i)
+ NewMI.addOperand(MI->getOperand(i));
+ printInstruction(&NewMI, O);
+ return;
+ }
+ }
+
printInstruction(MI, O);
printAnnotation(O, Annot);
}
@@ -224,9 +290,11 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
unsigned Reg = Op.getReg();
- O << getRegisterName(Reg);
+ printRegName(O, Reg);
} else if (Op.isImm()) {
- O << '#' << Op.getImm();
+ O << markup("<imm:")
+ << '#' << formatImm(Op.getImm())
+ << markup(">");
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
// If a symbolic branch target was added as a constant expression then print
@@ -244,13 +312,16 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
}
}
-void ARMInstPrinter::printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
if (MO1.isExpr())
O << *MO1.getExpr();
- else if (MO1.isImm())
- O << "[pc, #" << MO1.getImm() << "]";
+ else if (MO1.isImm()) {
+ O << markup("<mem:") << "[pc, "
+ << markup("<imm:") << "#" << formatImm(MO1.getImm())
+ << markup(">]>", "]");
+ }
else
llvm_unreachable("Unknown LDR label operand?");
}
@@ -266,7 +337,7 @@ void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
const MCOperand &MO2 = MI->getOperand(OpNum+1);
const MCOperand &MO3 = MI->getOperand(OpNum+2);
- O << getRegisterName(MO1.getReg());
+ printRegName(O, MO1.getReg());
// Print the shift opc.
ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
@@ -274,7 +345,8 @@ void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
if (ShOpc == ARM_AM::rrx)
return;
- O << ' ' << getRegisterName(MO2.getReg());
+ O << ' ';
+ printRegName(O, MO2.getReg());
assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
}
@@ -283,14 +355,11 @@ void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum,
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
- O << getRegisterName(MO1.getReg());
+ printRegName(O, MO1.getReg());
// Print the shift opc.
- ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
- O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
- if (ShOpc == ARM_AM::rrx)
- return;
- O << " #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()));
+ printRegImmShift(O, ARM_AM::getSORegShOp(MO2.getImm()),
+ ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
}
@@ -304,67 +373,51 @@ void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
const MCOperand &MO2 = MI->getOperand(Op+1);
const MCOperand &MO3 = MI->getOperand(Op+2);
- O << "[" << getRegisterName(MO1.getReg());
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
if (!MO2.getReg()) {
- if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
- O << ", #"
+ if (ARM_AM::getAM2Offset(MO3.getImm())) { // Don't print +0.
+ O << ", "
+ << markup("<imm:")
+ << "#"
<< ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
- << ARM_AM::getAM2Offset(MO3.getImm());
- O << "]";
- return;
- }
-
- O << ", "
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
- << getRegisterName(MO2.getReg());
-
- if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
- << " #" << ShImm;
- O << "]";
-}
-
-void ARMInstPrinter::printAM2PostIndexOp(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- const MCOperand &MO1 = MI->getOperand(Op);
- const MCOperand &MO2 = MI->getOperand(Op+1);
- const MCOperand &MO3 = MI->getOperand(Op+2);
-
- O << "[" << getRegisterName(MO1.getReg()) << "], ";
-
- if (!MO2.getReg()) {
- unsigned ImmOffs = ARM_AM::getAM2Offset(MO3.getImm());
- O << '#'
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
- << ImmOffs;
+ << ARM_AM::getAM2Offset(MO3.getImm())
+ << markup(">");
+ }
+ O << "]" << markup(">");
return;
}
- O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
- << getRegisterName(MO2.getReg());
+ O << ", ";
+ O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()));
+ printRegName(O, MO2.getReg());
- if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
- << " #" << ShImm;
+ printRegImmShift(O, ARM_AM::getAM2ShiftOpc(MO3.getImm()),
+ ARM_AM::getAM2Offset(MO3.getImm()), UseMarkup);
+ O << "]" << markup(">");
}
void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
const MCOperand &MO2 = MI->getOperand(Op+1);
- O << "[" << getRegisterName(MO1.getReg()) << ", "
- << getRegisterName(MO2.getReg()) << "]";
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ O << ", ";
+ printRegName(O, MO2.getReg());
+ O << "]" << markup(">");
}
void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
const MCOperand &MO2 = MI->getOperand(Op+1);
- O << "[" << getRegisterName(MO1.getReg()) << ", "
- << getRegisterName(MO2.getReg()) << ", lsl #1]";
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ O << ", ";
+ printRegName(O, MO2.getReg());
+ O << ", lsl " << markup("<imm:") << "#1" << markup(">") << "]" << markup(">");
}
void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
@@ -376,13 +429,13 @@ void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
return;
}
+#ifndef NDEBUG
const MCOperand &MO3 = MI->getOperand(Op+2);
unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm());
+ assert(IdxMode != ARMII::IndexModePost &&
+ "Should be pre or offset index op");
+#endif
- if (IdxMode == ARMII::IndexModePost) {
- printAM2PostIndexOp(MI, Op, O);
- return;
- }
printAM2PreOrOffsetIndexOp(MI, Op, O);
}
@@ -394,19 +447,18 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
if (!MO1.getReg()) {
unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
- O << '#'
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
- << ImmOffs;
+ O << markup("<imm:")
+ << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+ << ImmOffs
+ << markup(">");
return;
}
- O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
- << getRegisterName(MO1.getReg());
+ O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()));
+ printRegName(O, MO1.getReg());
- if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm()))
- << " #" << ShImm;
+ printRegImmShift(O, ARM_AM::getAM2ShiftOpc(MO2.getImm()),
+ ARM_AM::getAM2Offset(MO2.getImm()), UseMarkup);
}
//===--------------------------------------------------------------------===//
@@ -419,18 +471,22 @@ void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op,
const MCOperand &MO2 = MI->getOperand(Op+1);
const MCOperand &MO3 = MI->getOperand(Op+2);
- O << "[" << getRegisterName(MO1.getReg()) << "], ";
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ O << "], " << markup(">");
if (MO2.getReg()) {
- O << (char)ARM_AM::getAM3Op(MO3.getImm())
- << getRegisterName(MO2.getReg());
+ O << (char)ARM_AM::getAM3Op(MO3.getImm());
+ printRegName(O, MO2.getReg());
return;
}
unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
- O << '#'
+ O << markup("<imm:")
+ << '#'
<< ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
- << ImmOffs;
+ << ImmOffs
+ << markup(">");
}
void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
@@ -439,23 +495,29 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
const MCOperand &MO2 = MI->getOperand(Op+1);
const MCOperand &MO3 = MI->getOperand(Op+2);
- O << '[' << getRegisterName(MO1.getReg());
+ O << markup("<mem:") << '[';
+ printRegName(O, MO1.getReg());
if (MO2.getReg()) {
- O << ", " << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
- << getRegisterName(MO2.getReg()) << ']';
+ O << ", " << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()));
+ printRegName(O, MO2.getReg());
+ O << ']' << markup(">");
return;
}
- //If the op is sub we have to print the immediate even if it is 0
+ //If the op is sub we have to print the immediate even if it is 0
unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm());
-
- if (ImmOffs || (op == ARM_AM::sub))
- O << ", #"
+
+ if (ImmOffs || (op == ARM_AM::sub)) {
+ O << ", "
+ << markup("<imm:")
+ << "#"
<< ARM_AM::getAddrOpcStr(op)
- << ImmOffs;
- O << ']';
+ << ImmOffs
+ << markup(">");
+ }
+ O << ']' << markup(">");
}
void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
@@ -483,15 +545,15 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
const MCOperand &MO2 = MI->getOperand(OpNum+1);
if (MO1.getReg()) {
- O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
- << getRegisterName(MO1.getReg());
+ O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()));
+ printRegName(O, MO1.getReg());
return;
}
unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
- O << '#'
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
- << ImmOffs;
+ O << markup("<imm:")
+ << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs
+ << markup(">");
}
void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI,
@@ -499,7 +561,9 @@ void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
unsigned Imm = MO.getImm();
- O << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff);
+ O << markup("<imm:")
+ << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff)
+ << markup(">");
}
void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
@@ -507,7 +571,8 @@ void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
- O << (MO2.getImm() ? "" : "-") << getRegisterName(MO1.getReg());
+ O << (MO2.getImm() ? "" : "-");
+ printRegName(O, MO1.getReg());
}
void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI,
@@ -515,7 +580,9 @@ void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
unsigned Imm = MO.getImm();
- O << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2);
+ O << markup("<imm:")
+ << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2)
+ << markup(">");
}
@@ -536,16 +603,20 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
return;
}
- O << "[" << getRegisterName(MO1.getReg());
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
unsigned Op = ARM_AM::getAM5Op(MO2.getImm());
if (ImmOffs || Op == ARM_AM::sub) {
- O << ", #"
+ O << ", "
+ << markup("<imm:")
+ << "#"
<< ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
- << ImmOffs * 4;
+ << ImmOffs * 4
+ << markup(">");
}
- O << "]";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
@@ -553,18 +624,21 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
- O << "[" << getRegisterName(MO1.getReg());
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
if (MO2.getImm()) {
// FIXME: Both darwin as and GNU as violate ARM docs here.
O << ", :" << (MO2.getImm() << 3);
}
- O << "]";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- O << "[" << getRegisterName(MO1.getReg()) << "]";
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ O << "]" << markup(">");
}
void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
@@ -573,8 +647,10 @@ void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
const MCOperand &MO = MI->getOperand(OpNum);
if (MO.getReg() == 0)
O << "!";
- else
- O << ", " << getRegisterName(MO.getReg());
+ else {
+ O << ", ";
+ printRegName(O, MO.getReg());
+ }
}
void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
@@ -585,7 +661,9 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
int32_t lsb = CountTrailingZeros_32(v);
int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb;
assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
- O << '#' << lsb << ", #" << width;
+ O << markup("<imm:") << '#' << lsb << markup(">")
+ << ", "
+ << markup("<imm:") << '#' << width << markup(">");
}
void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
@@ -599,10 +677,18 @@ void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
unsigned ShiftOp = MI->getOperand(OpNum).getImm();
bool isASR = (ShiftOp & (1 << 5)) != 0;
unsigned Amt = ShiftOp & 0x1f;
- if (isASR)
- O << ", asr #" << (Amt == 0 ? 32 : Amt);
- else if (Amt)
- O << ", lsl #" << Amt;
+ if (isASR) {
+ O << ", asr "
+ << markup("<imm:")
+ << "#" << (Amt == 0 ? 32 : Amt)
+ << markup(">");
+ }
+ else if (Amt) {
+ O << ", lsl "
+ << markup("<imm:")
+ << "#" << Amt
+ << markup(">");
+ }
}
void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
@@ -611,7 +697,7 @@ void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
if (Imm == 0)
return;
assert(Imm > 0 && Imm < 32 && "Invalid PKH shift immediate value!");
- O << ", lsl #" << Imm;
+ O << ", lsl " << markup("<imm:") << "#" << Imm << markup(">");
}
void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
@@ -621,7 +707,7 @@ void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
if (Imm == 0)
Imm = 32;
assert(Imm > 0 && Imm <= 32 && "Invalid PKH shift immediate value!");
- O << ", asr #" << Imm;
+ O << ", asr " << markup("<imm:") << "#" << Imm << markup(">");
}
void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
@@ -629,11 +715,20 @@ void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
O << "{";
for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
if (i != OpNum) O << ", ";
- O << getRegisterName(MI->getOperand(i).getReg());
+ printRegName(O, MI->getOperand(i).getReg());
}
O << "}";
}
+void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ printRegName(O, MRI.getSubReg(Reg, ARM::gsub_0));
+ O << ", ";
+ printRegName(O, MRI.getSubReg(Reg, ARM::gsub_1));
+}
+
+
void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNum);
@@ -803,23 +898,29 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
int32_t OffImm = (int32_t)MO.getImm();
+ O << markup("<imm:");
if (OffImm == INT32_MIN)
O << "#-0";
else if (OffImm < 0)
O << "#-" << -OffImm;
else
O << "#" << OffImm;
+ O << markup(">");
}
void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
- O << "#" << MI->getOperand(OpNum).getImm() * 4;
+ O << markup("<imm:")
+ << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4)
+ << markup(">");
}
void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNum).getImm();
- O << "#" << (Imm == 0 ? 32 : Imm);
+ O << markup("<imm:")
+ << "#" << formatImm((Imm == 0 ? 32 : Imm))
+ << markup(">");
}
void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
@@ -849,10 +950,13 @@ void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
return;
}
- O << "[" << getRegisterName(MO1.getReg());
- if (unsigned RegNum = MO2.getReg())
- O << ", " << getRegisterName(RegNum);
- O << "]";
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ if (unsigned RegNum = MO2.getReg()) {
+ O << ", ";
+ printRegName(O, RegNum);
+ }
+ O << "]" << markup(">");
}
void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
@@ -867,10 +971,15 @@ void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
return;
}
- O << "[" << getRegisterName(MO1.getReg());
- if (unsigned ImmOffs = MO2.getImm())
- O << ", #" << ImmOffs * Scale;
- O << "]";
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ if (unsigned ImmOffs = MO2.getImm()) {
+ O << ", "
+ << markup("<imm:")
+ << "#" << formatImm(ImmOffs * Scale)
+ << markup(">");
+ }
+ O << "]" << markup(">");
}
void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI,
@@ -906,14 +1015,12 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
const MCOperand &MO2 = MI->getOperand(OpNum+1);
unsigned Reg = MO1.getReg();
- O << getRegisterName(Reg);
+ printRegName(O, Reg);
// Print the shift opc.
assert(MO2.isImm() && "Not a valid t2_so_reg value!");
- ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
- O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
- if (ShOpc != ARM_AM::rrx)
- O << " #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()));
+ printRegImmShift(O, ARM_AM::getSORegShOp(MO2.getImm()),
+ ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
}
void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
@@ -926,18 +1033,27 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
return;
}
- O << "[" << getRegisterName(MO1.getReg());
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
int32_t OffImm = (int32_t)MO2.getImm();
bool isSub = OffImm < 0;
// Special value for #-0. All others are normal.
if (OffImm == INT32_MIN)
OffImm = 0;
- if (isSub)
- O << ", #-" << -OffImm;
- else if (OffImm > 0)
- O << ", #" << OffImm;
- O << "]";
+ if (isSub) {
+ O << ", "
+ << markup("<imm:")
+ << "#-" << -OffImm
+ << markup(">");
+ }
+ else if (OffImm > 0) {
+ O << ", "
+ << markup("<imm:")
+ << "#" << OffImm
+ << markup(">");
+ }
+ O << "]" << markup(">");
}
void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
@@ -946,17 +1062,24 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
- O << "[" << getRegisterName(MO1.getReg());
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
int32_t OffImm = (int32_t)MO2.getImm();
// Don't print +0.
+ if (OffImm != 0)
+ O << ", ";
+ if (OffImm != 0 && UseMarkup)
+ O << "<imm:";
if (OffImm == INT32_MIN)
- O << ", #-0";
+ O << "#-0";
else if (OffImm < 0)
- O << ", #-" << -OffImm;
+ O << "#-" << -OffImm;
else if (OffImm > 0)
- O << ", #" << OffImm;
- O << "]";
+ O << "#" << OffImm;
+ if (OffImm != 0 && UseMarkup)
+ O << ">";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
@@ -970,20 +1093,27 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
return;
}
- O << "[" << getRegisterName(MO1.getReg());
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
int32_t OffImm = (int32_t)MO2.getImm();
assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
// Don't print +0.
+ if (OffImm != 0)
+ O << ", ";
+ if (OffImm != 0 && UseMarkup)
+ O << "<imm:";
if (OffImm == INT32_MIN)
- O << ", #-0";
+ O << "#-0";
else if (OffImm < 0)
- O << ", #-" << -OffImm;
+ O << "#-" << -OffImm;
else if (OffImm > 0)
- O << ", #" << OffImm;
- O << "]";
+ O << "#" << OffImm;
+ if (OffImm != 0 && UseMarkup)
+ O << ">";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI,
@@ -992,10 +1122,15 @@ void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI,
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
- O << "[" << getRegisterName(MO1.getReg());
- if (MO2.getImm())
- O << ", #" << MO2.getImm() * 4;
- O << "]";
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ if (MO2.getImm()) {
+ O << ", "
+ << markup("<imm:")
+ << "#" << formatImm(MO2.getImm() * 4)
+ << markup(">");
+ }
+ O << "]" << markup(">");
}
void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
@@ -1003,11 +1138,12 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
int32_t OffImm = (int32_t)MO1.getImm();
- // Don't print +0.
+ O << ", " << markup("<imm:");
if (OffImm < 0)
- O << ", #-" << -OffImm;
+ O << "#-" << -OffImm;
else
- O << ", #" << OffImm;
+ O << "#" << OffImm;
+ O << markup(">");
}
void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
@@ -1019,12 +1155,18 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
// Don't print +0.
+ if (OffImm != 0)
+ O << ", ";
+ if (OffImm != 0 && UseMarkup)
+ O << "<imm:";
if (OffImm == INT32_MIN)
- O << ", #-0";
+ O << "#-0";
else if (OffImm < 0)
- O << ", #-" << -OffImm;
+ O << "#-" << -OffImm;
else if (OffImm > 0)
- O << ", #" << OffImm;
+ O << "#" << OffImm;
+ if (OffImm != 0 && UseMarkup)
+ O << ">";
}
void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
@@ -1034,23 +1176,30 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
const MCOperand &MO2 = MI->getOperand(OpNum+1);
const MCOperand &MO3 = MI->getOperand(OpNum+2);
- O << "[" << getRegisterName(MO1.getReg());
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
assert(MO2.getReg() && "Invalid so_reg load / store address!");
- O << ", " << getRegisterName(MO2.getReg());
+ O << ", ";
+ printRegName(O, MO2.getReg());
unsigned ShAmt = MO3.getImm();
if (ShAmt) {
assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
- O << ", lsl #" << ShAmt;
+ O << ", lsl "
+ << markup("<imm:")
+ << "#" << ShAmt
+ << markup(">");
}
- O << "]";
+ O << "]" << markup(">");
}
void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
- O << '#' << ARM_AM::getFPImmFloat(MO.getImm());
+ O << markup("<imm:")
+ << '#' << ARM_AM::getFPImmFloat(MO.getImm())
+ << markup(">");
}
void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
@@ -1058,14 +1207,18 @@ void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
unsigned EncodedImm = MI->getOperand(OpNum).getImm();
unsigned EltBits;
uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
- O << "#0x";
+ O << markup("<imm:")
+ << "#0x";
O.write_hex(Val);
+ O << markup(">");
}
void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNum).getImm();
- O << "#" << Imm + 1;
+ O << markup("<imm:")
+ << "#" << formatImm(Imm + 1)
+ << markup(">");
}
void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
@@ -1073,23 +1226,30 @@ void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
unsigned Imm = MI->getOperand(OpNum).getImm();
if (Imm == 0)
return;
- O << ", ror #";
+ O << ", ror "
+ << markup("<imm:")
+ << "#";
switch (Imm) {
default: assert (0 && "illegal ror immediate!");
case 1: O << "8"; break;
case 2: O << "16"; break;
case 3: O << "24"; break;
}
+ O << markup(">");
}
void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
- O << "#" << 16 - MI->getOperand(OpNum).getImm();
+ O << markup("<imm:")
+ << "#" << 16 - MI->getOperand(OpNum).getImm()
+ << markup(">");
}
void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
- O << "#" << 32 - MI->getOperand(OpNum).getImm();
+ O << markup("<imm:")
+ << "#" << 32 - MI->getOperand(OpNum).getImm()
+ << markup(">");
}
void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
@@ -1099,7 +1259,9 @@ void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "}";
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "}";
}
void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
@@ -1107,7 +1269,11 @@ void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
unsigned Reg = MI->getOperand(OpNum).getReg();
unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
- O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}";
+ O << "{";
+ printRegName(O, Reg0);
+ O << ", ";
+ printRegName(O, Reg1);
+ O << "}";
}
void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
@@ -1116,7 +1282,11 @@ void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
unsigned Reg = MI->getOperand(OpNum).getReg();
unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
- O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}";
+ O << "{";
+ printRegName(O, Reg0);
+ O << ", ";
+ printRegName(O, Reg1);
+ O << "}";
}
void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum,
@@ -1124,9 +1294,13 @@ void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum,
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}";
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 1);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << "}";
}
void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
@@ -1134,16 +1308,23 @@ void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "}";
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 1);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 3);
+ O << "}";
}
void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[]}";
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "[]}";
}
void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
@@ -1152,7 +1333,11 @@ void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
unsigned Reg = MI->getOperand(OpNum).getReg();
unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
- O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}";
+ O << "{";
+ printRegName(O, Reg0);
+ O << "[], ";
+ printRegName(O, Reg1);
+ O << "[]}";
}
void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
@@ -1161,9 +1346,13 @@ void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}";
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 1);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << "[]}";
}
void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
@@ -1172,10 +1361,15 @@ void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "[]}";
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 1);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 3);
+ O << "[]}";
}
void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
@@ -1184,7 +1378,11 @@ void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
unsigned Reg = MI->getOperand(OpNum).getReg();
unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
- O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}";
+ O << "{";
+ printRegName(O, Reg0);
+ O << "[], ";
+ printRegName(O, Reg1);
+ O << "[]}";
}
void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI,
@@ -1193,9 +1391,13 @@ void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI,
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[]}";
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 4);
+ O << "[]}";
}
void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI,
@@ -1204,10 +1406,15 @@ void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI,
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "[]}";
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 4);
+ O << "[], ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 6);
+ O << "[]}";
}
void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI,
@@ -1216,9 +1423,13 @@ void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI,
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "}";
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 4);
+ O << "}";
}
void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI,
@@ -1227,8 +1438,13 @@ void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI,
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "}";
+ O << "{";
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 4);
+ O << ", ";
+ printRegName(O, MI->getOperand(OpNum).getReg() + 6);
+ O << "}";
}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 73d7bfd..edff75d 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -124,9 +124,11 @@ public:
void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printGPRPairOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
void printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
diff --git a/lib/Target/ARM/LICENSE.TXT b/lib/Target/ARM/LICENSE.TXT
new file mode 100755
index 0000000..68afea1
--- /dev/null
+++ b/lib/Target/ARM/LICENSE.TXT
@@ -0,0 +1,47 @@
+ARM Limited
+
+Software Grant License Agreement ("Agreement")
+
+Except for the license granted herein to you, ARM Limited ("ARM") reserves all
+right, title, and interest in and to the Software (defined below).
+
+Definition
+
+"Software" means the code and documentation as well as any original work of
+authorship, including any modifications or additions to an existing work, that
+is intentionally submitted by ARM to llvm.org (http://llvm.org) ("LLVM") for
+inclusion in, or documentation of, any of the products owned or managed by LLVM
+(the "Work"). For the purposes of this definition, "submitted" means any form of
+electronic, verbal, or written communication sent to LLVM or its
+representatives, including but not limited to communication on electronic
+mailing lists, source code control systems, and issue tracking systems that are
+managed by, or on behalf of, LLVM for the purpose of discussing and improving
+the Work, but excluding communication that is conspicuously marked otherwise.
+
+1. Grant of Copyright License. Subject to the terms and conditions of this
+ Agreement, ARM hereby grants to you and to recipients of the Software
+ distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge,
+ royalty-free, irrevocable copyright license to reproduce, prepare derivative
+ works of, publicly display, publicly perform, sublicense, and distribute the
+ Software and such derivative works.
+
+2. Grant of Patent License. Subject to the terms and conditions of this
+ Agreement, ARM hereby grants you and to recipients of the Software
+ distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge,
+ royalty-free, irrevocable (except as stated in this section) patent license
+ to make, have made, use, offer to sell, sell, import, and otherwise transfer
+ the Work, where such license applies only to those patent claims licensable
+ by ARM that are necessarily infringed by ARM's Software alone or by
+ combination of the Software with the Work to which such Software was
+ submitted. If any entity institutes patent litigation against ARM or any
+ other entity (including a cross-claim or counterclaim in a lawsuit) alleging
+ that ARM's Software, or the Work to which ARM has contributed constitutes
+ direct or contributory patent infringement, then any patent licenses granted
+ to that entity under this Agreement for the Software or Work shall terminate
+ as of the date such litigation is filed.
+
+Unless required by applicable law or agreed to in writing, the software is
+provided on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+either express or implied, including, without limitation, any warranties or
+conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+PARTICULAR PURPOSE.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index ac6ce64..1f1b334 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -8,9 +8,10 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMFixupKinds.h"
-#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDirectives.h"
@@ -21,7 +22,6 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/MachOFormat.h"
@@ -114,11 +114,15 @@ public:
MCValue &Target, uint64_t &Value,
bool &IsResolved);
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+
bool mayNeedRelaxation(const MCInst &Inst) const;
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
- const MCInstFragment *DF,
+ const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const;
void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
@@ -161,7 +165,7 @@ bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
- const MCInstFragment *DF,
+ const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const {
switch ((unsigned)Fixup.getKind()) {
case ARM::fixup_arm_thumb_br: {
@@ -216,7 +220,7 @@ void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8
const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP
- const uint32_t ARMv4_NopEncoding = 0xe1a0000; // using MOV r0,r0
+ const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0
const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP
if (isThumb()) {
const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding
@@ -552,63 +556,6 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
(void)adjustFixupValue(Fixup, Value, &Asm.getContext());
}
-namespace {
-
-// FIXME: This should be in a separate file.
-// ELF is an ELF of course...
-class ELFARMAsmBackend : public ARMAsmBackend {
-public:
- uint8_t OSABI;
- ELFARMAsmBackend(const Target &T, const StringRef TT,
- uint8_t _OSABI)
- : ARMAsmBackend(T, TT), OSABI(_OSABI) { }
-
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value) const;
-
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createARMELFObjectWriter(OS, OSABI);
- }
-};
-
-// FIXME: Raise this to share code between Darwin and ELF.
-void ELFARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value) const {
- unsigned NumBytes = 4; // FIXME: 2 for Thumb
- Value = adjustFixupValue(Fixup, Value);
- if (!Value) return; // Doesn't change encoding.
-
- unsigned Offset = Fixup.getOffset();
-
- // For each byte of the fragment that the fixup touches, mask in the bits from
- // the fixup value. The Value has been "split up" into the appropriate
- // bitfields above.
- for (unsigned i = 0; i != NumBytes; ++i)
- Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
-}
-
-// FIXME: This should be in a separate file.
-class DarwinARMAsmBackend : public ARMAsmBackend {
-public:
- const object::mach::CPUSubtypeARM Subtype;
- DarwinARMAsmBackend(const Target &T, const StringRef TT,
- object::mach::CPUSubtypeARM st)
- : ARMAsmBackend(T, TT), Subtype(st) { }
-
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
- object::mach::CTM_ARM,
- Subtype);
- }
-
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value) const;
-
- virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
- return false;
- }
-};
-
/// getFixupKindNumBytes - The number of bytes the fixup may change.
static unsigned getFixupKindNumBytes(unsigned Kind) {
switch (Kind) {
@@ -657,8 +604,8 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
}
}
-void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value) const {
+void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
Value = adjustFixupValue(Fixup, Value);
if (!Value) return; // Doesn't change encoding.
@@ -666,15 +613,53 @@ void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned Offset = Fixup.getOffset();
assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
- // For each byte of the fragment that the fixup touches, mask in the
- // bits from the fixup value.
+ // For each byte of the fragment that the fixup touches, mask in the bits from
+ // the fixup value. The Value has been "split up" into the appropriate
+ // bitfields above.
for (unsigned i = 0; i != NumBytes; ++i)
Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
}
+namespace {
+
+// FIXME: This should be in a separate file.
+// ELF is an ELF of course...
+class ELFARMAsmBackend : public ARMAsmBackend {
+public:
+ uint8_t OSABI;
+ ELFARMAsmBackend(const Target &T, const StringRef TT,
+ uint8_t _OSABI)
+ : ARMAsmBackend(T, TT), OSABI(_OSABI) { }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createARMELFObjectWriter(OS, OSABI);
+ }
+};
+
+// FIXME: This should be in a separate file.
+class DarwinARMAsmBackend : public ARMAsmBackend {
+public:
+ const object::mach::CPUSubtypeARM Subtype;
+ DarwinARMAsmBackend(const Target &T, const StringRef TT,
+ object::mach::CPUSubtypeARM st)
+ : ARMAsmBackend(T, TT), Subtype(st) {
+ HasDataInCodeSupport = true;
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
+ object::mach::CTM_ARM,
+ Subtype);
+ }
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ return false;
+ }
+};
+
} // end anonymous namespace
-MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT) {
+MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin()) {
@@ -687,6 +672,15 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT) {
else if (TheTriple.getArchName() == "armv6" ||
TheTriple.getArchName() == "thumbv6")
return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6);
+ else if (TheTriple.getArchName() == "armv7f" ||
+ TheTriple.getArchName() == "thumbv7f")
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7F);
+ else if (TheTriple.getArchName() == "armv7k" ||
+ TheTriple.getArchName() == "thumbv7k")
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7K);
+ else if (TheTriple.getArchName() == "armv7s" ||
+ TheTriple.getArchName() == "thumbv7s")
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7S);
return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index b53da3b..9193e40 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -7,17 +7,17 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/ARMFixupKinds.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -133,6 +133,7 @@ const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
switch (RelocType) {
default: EmitThisSym = true; break;
case ELF::R_ARM_ABS32: EmitThisSym = false; break;
+ case ELF::R_ARM_PREL31: EmitThisSym = false; break;
}
}
@@ -225,6 +226,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case FK_Data_4:
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_ARM_NONE:
+ Type = ELF::R_ARM_NONE;
+ break;
case MCSymbolRefExpr::VK_ARM_GOT:
Type = ELF::R_ARM_GOT_BREL;
break;
@@ -246,7 +250,13 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_ARM_TARGET1:
Type = ELF::R_ARM_TARGET1;
break;
- }
+ case MCSymbolRefExpr::VK_ARM_TARGET2:
+ Type = ELF::R_ARM_TARGET2;
+ break;
+ case MCSymbolRefExpr::VK_ARM_PREL31:
+ Type = ELF::R_ARM_PREL31;
+ break;
+ }
break;
case ARM::fixup_arm_ldst_pcrel_12:
case ARM::fixup_arm_pcrel_10:
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
new file mode 100644
index 0000000..39ded8f
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -0,0 +1,201 @@
+//===- lib/MC/ARMELFStreamer.cpp - ELF Object Output for ARM --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits ARM ELF .o object files. Different
+// from generic ELF streamer in emitting mapping symbols ($a, $t and $d) to
+// delimit regions of data and code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
+/// the appropriate points in the object files. These symbols are defined in the
+/// ARM ELF ABI: infocenter.arm.com/help/topic/com.arm.../IHI0044D_aaelf.pdf.
+///
+/// In brief: $a, $t or $d should be emitted at the start of each contiguous
+/// region of ARM code, Thumb code or data in a section. In practice, this
+/// emission does not rely on explicit assembler directives but on inherent
+/// properties of the directives doing the emission (e.g. ".byte" is data, "add
+/// r0, r0, r0" an instruction).
+///
+/// As a result this system is orthogonal to the DataRegion infrastructure used
+/// by MachO. Beware!
+class ARMELFStreamer : public MCELFStreamer {
+public:
+ ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter, bool IsThumb)
+ : MCELFStreamer(Context, TAB, OS, Emitter),
+ IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) {
+ }
+
+ ~ARMELFStreamer() {}
+
+ virtual void ChangeSection(const MCSection *Section) {
+ // We have to keep track of the mapping symbol state of any sections we
+ // use. Each one should start off as EMS_None, which is provided as the
+ // default constructor by DenseMap::lookup.
+ LastMappingSymbols[getPreviousSection()] = LastEMS;
+ LastEMS = LastMappingSymbols.lookup(Section);
+
+ MCELFStreamer::ChangeSection(Section);
+ }
+
+ /// This function is the one used to emit instruction data into the ELF
+ /// streamer. We override it to add the appropriate mapping symbol if
+ /// necessary.
+ virtual void EmitInstruction(const MCInst& Inst) {
+ if (IsThumb)
+ EmitThumbMappingSymbol();
+ else
+ EmitARMMappingSymbol();
+
+ MCELFStreamer::EmitInstruction(Inst);
+ }
+
+ /// This is one of the functions used to emit data into an ELF section, so the
+ /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
+ /// necessary.
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {
+ EmitDataMappingSymbol();
+ MCELFStreamer::EmitBytes(Data, AddrSpace);
+ }
+
+ /// This is one of the functions used to emit data into an ELF section, so the
+ /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
+ /// necessary.
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ EmitDataMappingSymbol();
+ MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace);
+ }
+
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ MCELFStreamer::EmitAssemblerFlag(Flag);
+
+ switch (Flag) {
+ case MCAF_SyntaxUnified:
+ return; // no-op here.
+ case MCAF_Code16:
+ IsThumb = true;
+ return; // Change to Thumb mode
+ case MCAF_Code32:
+ IsThumb = false;
+ return; // Change to ARM mode
+ case MCAF_Code64:
+ return;
+ case MCAF_SubsectionsViaSymbols:
+ return;
+ }
+ }
+
+private:
+ enum ElfMappingSymbol {
+ EMS_None,
+ EMS_ARM,
+ EMS_Thumb,
+ EMS_Data
+ };
+
+ void EmitDataMappingSymbol() {
+ if (LastEMS == EMS_Data) return;
+ EmitMappingSymbol("$d");
+ LastEMS = EMS_Data;
+ }
+
+ void EmitThumbMappingSymbol() {
+ if (LastEMS == EMS_Thumb) return;
+ EmitMappingSymbol("$t");
+ LastEMS = EMS_Thumb;
+ }
+
+ void EmitARMMappingSymbol() {
+ if (LastEMS == EMS_ARM) return;
+ EmitMappingSymbol("$a");
+ LastEMS = EMS_ARM;
+ }
+
+ void EmitMappingSymbol(StringRef Name) {
+ MCSymbol *Start = getContext().CreateTempSymbol();
+ EmitLabel(Start);
+
+ MCSymbol *Symbol =
+ getContext().GetOrCreateSymbol(Name + "." +
+ Twine(MappingSymbolCounter++));
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ MCELF::SetType(SD, ELF::STT_NOTYPE);
+ MCELF::SetBinding(SD, ELF::STB_LOCAL);
+ SD.setExternal(false);
+ Symbol->setSection(*getCurrentSection());
+
+ const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
+ Symbol->setVariableValue(Value);
+ }
+
+ void EmitThumbFunc(MCSymbol *Func) {
+ // FIXME: Anything needed here to flag the function as thumb?
+
+ getAssembler().setIsThumbFunc(Func);
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Func);
+ SD.setFlags(SD.getFlags() | ELF_Other_ThumbFunc);
+ }
+
+
+ bool IsThumb;
+ int64_t MappingSymbolCounter;
+
+ DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
+ ElfMappingSymbol LastEMS;
+
+ /// @}
+};
+}
+
+namespace llvm {
+ MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack,
+ bool IsThumb) {
+ ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb);
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ if (NoExecStack)
+ S->getAssembler().setNoExecStack(true);
+ return S;
+ }
+
+}
+
+
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h
new file mode 100644
index 0000000..77ae5d2
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h
@@ -0,0 +1,27 @@
+//===-- ARMELFStreamer.h - ELF Streamer for ARM ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF streamer information for the ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_ELF_STREAMER_H
+#define ARM_ELF_STREAMER_H
+
+#include "llvm/MC/MCELFStreamer.h"
+
+namespace llvm {
+
+ MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack,
+ bool IsThumb);
+}
+
+#endif // ARM_ELF_STREAMER_H
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 1917564..09e15af 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -12,11 +12,13 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMFixupKinds.h"
#include "MCTargetDesc/ARMMCExpr.h"
-#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -24,8 +26,6 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -35,8 +35,8 @@ STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created.");
namespace {
class ARMMCCodeEmitter : public MCCodeEmitter {
- ARMMCCodeEmitter(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
- void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
+ ARMMCCodeEmitter(const ARMMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ void operator=(const ARMMCCodeEmitter &) LLVM_DELETED_FUNCTION;
const MCInstrInfo &MCII;
const MCSubtargetInfo &STI;
const MCContext &CTX;
@@ -934,6 +934,10 @@ getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm());
unsigned SBits = getShiftOp(ShOp);
+ // While "lsr #32" and "asr #32" exist, they are encoded with a 0 in the shift
+ // amount. However, it would be an easy mistake to make so check here.
+ assert((ShImm & ~0x1f) == 0 && "Out of range shift amount");
+
// {16-13} = Rn
// {12} = isAdd
// {11-0} = shifter
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index 22e14a2..fc8505b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -9,8 +9,8 @@
#define DEBUG_TYPE "armmcexpr"
#include "ARMMCExpr.h"
-#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
using namespace llvm;
const ARMMCExpr*
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index a727e08..b404e6c 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -28,7 +28,7 @@ private:
explicit ARMMCExpr(VariantKind _Kind, const MCExpr *_Expr)
: Kind(_Kind), Expr(_Expr) {}
-
+
public:
/// @name Construction
/// @{
@@ -67,9 +67,6 @@ public:
static bool classof(const MCExpr *E) {
return E->getKind() == MCExpr::Target;
}
-
- static bool classof(const ARMMCExpr *) { return true; }
-
};
} // end namespace llvm
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 5df84c8..f4958f3 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -12,8 +12,9 @@
//===----------------------------------------------------------------------===//
#include "ARMMCTargetDesc.h"
-#include "ARMMCAsmInfo.h"
#include "ARMBaseInfo.h"
+#include "ARMELFStreamer.h"
+#include "ARMMCAsmInfo.h"
#include "InstPrinter/ARMInstPrinter.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrAnalysis.h"
@@ -71,6 +72,14 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
else
// Use CPU to figure out the exact features.
ARMArchFeature = "+v7";
+ } else if (Len >= Idx+2 && TT[Idx+1] == 's') {
+ if (NoCPU)
+ // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
+ // Swift
+ ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+t2xtpk";
+ else
+ // Use CPU to figure out the exact features.
+ ARMArchFeature = "+v7";
} else {
// v7 CPUs have lots of different feature sets. If no CPU is specified,
// then assume v7a (e.g. cortex-a8) feature set. Otherwise, return
@@ -136,7 +145,7 @@ static MCInstrInfo *createARMMCInstrInfo() {
static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) {
MCRegisterInfo *X = new MCRegisterInfo();
- InitARMMCRegisterInfo(X, ARM::LR);
+ InitARMMCRegisterInfo(X, ARM::LR, 0, 0, ARM::PC);
return X;
}
@@ -178,7 +187,8 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
llvm_unreachable("ARM does not support Windows COFF format");
}
- return createELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack);
+ return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack,
+ TheTriple.getArch() == Triple::thumb);
}
static MCInstPrinter *createARMMCInstPrinter(const Target &T,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 510302d..a89981e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -46,7 +46,7 @@ MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx);
-MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT);
+MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU);
/// createARMELFObjectWriter - Construct an ELF Mach-O object writer.
MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 95640f7..b9efe74 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -7,17 +7,18 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMFixupKinds.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
-#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCMachOSymbolFlags.h"
+#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ErrorHandling.h"
@@ -41,6 +42,12 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter {
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue);
+ bool requiresExternRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCFragment &Fragment,
+ unsigned RelocType, const MCSymbolData *SD,
+ uint64_t FixedValue);
+
public:
ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
uint32_t CPUSubtype)
@@ -305,6 +312,46 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
Writer->addRelocation(Fragment->getParent(), MRE);
}
+bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCFragment &Fragment,
+ unsigned RelocType,
+ const MCSymbolData *SD,
+ uint64_t FixedValue) {
+ // Most cases can be identified purely from the symbol.
+ if (Writer->doesSymbolRequireExternRelocation(SD))
+ return true;
+ int64_t Value = (int64_t)FixedValue; // The displacement is signed.
+ int64_t Range;
+ switch (RelocType) {
+ default:
+ return false;
+ case macho::RIT_ARM_Branch24Bit:
+ // PC pre-adjustment of 8 for these instructions.
+ Value -= 8;
+ // ARM BL/BLX has a 25-bit offset.
+ Range = 0x1ffffff;
+ break;
+ case macho::RIT_ARM_ThumbBranch22Bit:
+ // PC pre-adjustment of 4 for these instructions.
+ Value -= 4;
+ // Thumb BL/BLX has a 24-bit offset.
+ Range = 0xffffff;
+ }
+ // BL/BLX also use external relocations when an internal relocation
+ // would result in the target being out of range. This gives the linker
+ // enough information to generate a branch island.
+ const MCSectionData &SymSD = Asm.getSectionData(
+ SD->getSymbol().getSection());
+ Value += Writer->getSectionAddress(&SymSD);
+ Value -= Writer->getSectionAddress(Fragment.getParent());
+ // If the resultant value would be out of range for an internal relocation,
+ // use an external instead.
+ if (Value > Range || Value < -(Range + 1))
+ return true;
+ return false;
+}
+
void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
@@ -373,7 +420,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
}
// Check whether we need an external or internal relocation.
- if (Writer->doesSymbolRequireExternRelocation(SD)) {
+ if (requiresExternRelocation(Writer, Asm, *Fragment, RelocType, SD,
+ FixedValue)) {
IsExtern = 1;
Index = SD->getIndex();
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
index 2565994..e17eb4d 100644
--- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -1,6 +1,7 @@
add_llvm_library(LLVMARMDesc
ARMAsmBackend.cpp
ARMELFObjectWriter.cpp
+ ARMELFStreamer.cpp
ARMMCAsmInfo.cpp
ARMMCCodeEmitter.cpp
ARMMCExpr.cpp
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index ad60e32..2e266c2 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -16,16 +16,16 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
static cl::opt<bool>
@@ -51,7 +51,8 @@ namespace {
const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
- bool isA9;
+ bool isLikeA9;
+ bool isSwift;
unsigned MIIdx;
MachineInstr* LastMIs[4];
SmallPtrSet<MachineInstr*, 4> IgnoreStall;
@@ -60,6 +61,7 @@ namespace {
void pushStack(MachineInstr *MI);
MachineInstr *getAccDefMI(MachineInstr *MI) const;
unsigned getDefReg(MachineInstr *MI) const;
+ bool hasLoopHazard(MachineInstr *MI) const;
bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
bool FindMLxHazard(MachineInstr *MI);
void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
@@ -135,6 +137,50 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
return Reg;
}
+/// hasLoopHazard - Check whether an MLx instruction is chained to itself across
+/// a single-MBB loop.
+bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const {
+ unsigned Reg = MI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return false;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ while (true) {
+outer_continue:
+ if (DefMI->getParent() != MBB)
+ break;
+
+ if (DefMI->isPHI()) {
+ for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
+ if (DefMI->getOperand(i + 1).getMBB() == MBB) {
+ unsigned SrcReg = DefMI->getOperand(i).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ DefMI = MRI->getVRegDef(SrcReg);
+ goto outer_continue;
+ }
+ }
+ }
+ } else if (DefMI->isCopyLike()) {
+ Reg = DefMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ } else if (DefMI->isInsertSubreg()) {
+ Reg = DefMI->getOperand(2).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ }
+
+ break;
+ }
+
+ return DefMI == MI;
+}
+
bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
// FIXME: Detect integer instructions properly.
const MCInstrDesc &MCID = MI->getDesc();
@@ -149,6 +195,19 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
return false;
}
+static bool isFpMulInstruction(unsigned Opcode) {
+ switch (Opcode) {
+ case ARM::VMULS:
+ case ARM::VMULfd:
+ case ARM::VMULfq:
+ case ARM::VMULD:
+ case ARM::VMULslfd:
+ case ARM::VMULslfq:
+ return true;
+ default:
+ return false;
+ }
+}
bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
if (NumExpand >= ExpandLimit)
@@ -171,6 +230,12 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
return true;
}
+ // On Swift, we mostly care about hazards from multiplication instructions
+ // writing the accumulator and the pipelining of loop iterations by out-of-
+ // order execution.
+ if (isSwift)
+ return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI);
+
if (IgnoreStall.count(MI))
return false;
@@ -179,8 +244,8 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
// preserves the in-order retirement of the instructions.
// Look at the next few instructions, if *most* of them can cause hazards,
// then the scheduler can't *fix* this, we'd better break up the VMLA.
- unsigned Limit1 = isA9 ? 1 : 4;
- unsigned Limit2 = isA9 ? 1 : 4;
+ unsigned Limit1 = isLikeA9 ? 1 : 4;
+ unsigned Limit2 = isLikeA9 ? 1 : 4;
for (unsigned i = 1; i <= 4; ++i) {
int Idx = ((int)MIIdx - i + 4) % 4;
MachineInstr *NextMI = LastMIs[Idx];
@@ -316,7 +381,8 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
TRI = Fn.getTarget().getRegisterInfo();
MRI = &Fn.getRegInfo();
const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
- isA9 = STI->isCortexA9();
+ isLikeA9 = STI->isLikeA9() || STI->isSwift();
+ isSwift = STI->isSwift();
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index 3e48ed1..f069535 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -16,7 +16,7 @@ BUILT_SOURCES = ARMGenRegisterInfo.inc ARMGenInstrInfo.inc \
ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \
ARMGenDAGISel.inc ARMGenSubtargetInfo.inc \
ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
- ARMGenEDInfo.inc ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \
+ ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \
ARMGenMCPseudoLowering.inc ARMGenDisassemblerTables.inc
DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc
diff --git a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
index 500e3de..fa5681f 100644
--- a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
+++ b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index edd73c2..123ada6 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -281,7 +281,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
.addReg(ARM::R3, RegState::Kill);
AddDefaultPred(MIB);
- MIB->copyImplicitOps(&*MBBI);
+ MIB.copyImplicitOps(&*MBBI);
// erase the old tBX_RET instruction
MBB.erase(MBBI);
}
@@ -352,7 +352,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
continue;
Reg = ARM::PC;
(*MIB).setDesc(TII.get(ARM::tPOP_RET));
- MIB->copyImplicitOps(&*MI);
+ MIB.copyImplicitOps(&*MI);
MI = MBB.erase(MI);
}
MIB.addReg(Reg, getDefRegState(true));
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 735b255..095736d 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -15,8 +15,8 @@
#include "ARM.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInst.h"
using namespace llvm;
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index a39b722..57cc7d8 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -18,21 +18,21 @@
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
extern cl::opt<bool> ReuseFrameIndexVals;
@@ -390,6 +390,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc dl = MI.getDebugLoc();
+ MachineInstrBuilder MIB(*MBB.getParent(), &MI);
unsigned Opcode = MI.getOpcode();
const MCInstrDesc &Desc = MI.getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
@@ -417,7 +418,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
// Remove offset
MI.RemoveOperand(FrameRegIdx+1);
- MachineInstrBuilder MIB(&MI);
return true;
}
@@ -428,7 +428,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
if (Opcode == ARM::tADDi3) {
MI.setDesc(TII.get(Opcode));
removeOperands(MI, FrameRegIdx);
- MachineInstrBuilder MIB(&MI);
AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
.addImm(Offset / Scale));
} else {
@@ -457,7 +456,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
if (Opcode == ARM::tADDi3) {
MI.setDesc(TII.get(Opcode));
removeOperands(MI, FrameRegIdx);
- MachineInstrBuilder MIB(&MI);
AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
} else {
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
@@ -603,6 +601,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc dl = MI.getDebugLoc();
+ MachineInstrBuilder MIB(*MBB.getParent(), &MI);
while (!MI.getOperand(i).isFI()) {
++i;
@@ -719,8 +718,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
// Add predicate back if it's needed.
- if (MI.isPredicable()) {
- MachineInstrBuilder MIB(&MI);
+ if (MI.isPredicable())
AddDefaultPred(MIB);
- }
}
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index d54aa93..97c254c 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -11,12 +11,12 @@
#include "ARM.h"
#include "ARMMachineFunctionInfo.h"
#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumITs, "Number of IT blocks inserted");
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index e9e20dd..67e8ec7 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -51,7 +51,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
MachineBasicBlock *MBB = Tail->getParent();
ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
if (!AFI->hasITBlocks()) {
- TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+ TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest);
return;
}
@@ -65,7 +65,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
--MBBI;
// Actually replace the tail.
- TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+ TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest);
// Fix up IT.
if (CC != ARMCC::AL) {
@@ -408,7 +408,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
// Remove offset and remaining explicit predicate operands.
do MI.RemoveOperand(FrameRegIdx+1);
while (MI.getNumOperands() > FrameRegIdx+1);
- MachineInstrBuilder MIB(&MI);
+ MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI);
AddDefaultPred(MIB);
return true;
}
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 29a87d0..1a7a4d4 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -16,12 +16,12 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
using namespace llvm;
Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii,
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index f18f491..567bc05 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -9,19 +9,20 @@
#define DEBUG_TYPE "t2-reduce-size"
#include "ARM.h"
-#include "ARMBaseRegisterInfo.h"
#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "ARMSubtarget.h"
-#include "Thumb2InstrInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/IR/Function.h" // To access Function attributes
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
@@ -52,81 +53,82 @@ namespace {
unsigned PredCC2 : 2;
unsigned PartFlag : 1; // 16-bit instruction does partial flag update
unsigned Special : 1; // Needs to be dealt with specially
+ unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift)
};
static const ReduceEntry ReduceTable[] = {
- // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, PF, S
- { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0 },
- { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1 },
- { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0 },
- { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1 },
- { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1 },
- { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0 },
- { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0 },
- { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0 },
- { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0 },
- //FIXME: Disable CMN, as CCodes are backwards from compare expectations
- //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0 },
- { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0 },
- { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0 },
- { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1 },
- { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0 },
- // FIXME: adr.n immediate offset must be multiple of 4.
- //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0 },
- { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0 },
- { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0 },
- { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0 },
- { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0 },
- // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less
- // likely to cause issue in the loop. As a size / performance workaround,
- // they are not marked as such.
- { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0 },
- { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1 },
- // FIXME: Do we need the 16-bit 'S' variant?
- { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0 },
- { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0 },
- { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0 },
- { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0 },
- { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0 },
- { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0 },
- { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0 },
- { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0 },
- { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1 },
- { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0 },
- { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0 },
- { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0 },
- { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0 },
- { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0 },
- { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1 },
- { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1 },
- { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0 },
- { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1 },
- { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1 },
-
- // FIXME: Clean this up after splitting each Thumb load / store opcode
- // into multiple ones.
- { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1 },
- { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1 },
-
- { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1 },
- { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1 },
- { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1 },
- // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
- { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1 },
- { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1 },
+ // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM
+ { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 },
+ { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 },
+ { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 },
+ { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 },
+ { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
+ { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 },
+ { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
+ { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
+ { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 },
+ //FIXME: Disable CMN, as CCodes are backwards from compare expectations
+ //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 },
+ { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 },
+ // FIXME: adr.n immediate offset must be multiple of 4.
+ //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
+ { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
+ { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 },
+ { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
+ { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
+ // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less
+ // likely to cause issue in the loop. As a size / performance workaround,
+ // they are not marked as such.
+ { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0,0 },
+ { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1,0 },
+ // FIXME: Do we need the 16-bit 'S' variant?
+ { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 },
+ { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 },
+ { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
+ { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 },
+ { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
+ { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
+ { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
+ { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 },
+ { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
+ { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 },
+ { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 },
+ { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
+ { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 },
+ { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+ { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+ { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+ { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+
+ // FIXME: Clean this up after splitting each Thumb load / store opcode
+ // into multiple ones.
+ { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+
+ { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
+ { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
+ { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 },
+ // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
+ { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
+ { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 }
};
class Thumb2SizeReduce : public MachineFunctionPass {
@@ -175,13 +177,22 @@ namespace {
bool LiveCPSR, MachineInstr *CPSRDef,
bool IsSelfLoop);
+ /// ReduceMI - Attempt to reduce MI, return true on success.
+ bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
+ bool LiveCPSR, MachineInstr *CPSRDef,
+ bool IsSelfLoop);
+
/// ReduceMBB - Reduce width of instructions in the specified basic block.
bool ReduceMBB(MachineBasicBlock &MBB);
+
+ bool OptimizeSize;
+ bool MinimizeSize;
};
char Thumb2SizeReduce::ID = 0;
}
Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
+ OptimizeSize = MinimizeSize = false;
for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
unsigned FromOpc = ReduceTable[i].WideOpc;
if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
@@ -216,8 +227,8 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
bool
Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
bool FirstInSelfLoop) {
- // FIXME: Disable check for -Oz (aka OptimizeForSizeHarder).
- if (!STI->avoidCPSRPartialUpdate())
+ // Disable the check for -Oz (aka OptimizeForSizeHarder).
+ if (MinimizeSize || !STI->avoidCPSRPartialUpdate())
return false;
if (!Def)
@@ -578,7 +589,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
// are prioritized, but the table assumes a unique entry for each
// source insn opcode. So for now, we hack a local entry record to use.
static const ReduceEntry NarrowEntry =
- { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 };
+ { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop))
return true;
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
@@ -596,6 +607,12 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
return false;
+ if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
+ STI->avoidMOVsShifterOperand())
+ // Don't issue movs with shifter operand for some CPUs unless we
+ // are optimizing / minimizing for size.
+ return false;
+
unsigned Reg0 = MI->getOperand(0).getReg();
unsigned Reg1 = MI->getOperand(1).getReg();
// t2MUL is "special". The tied source operand is second, not first.
@@ -708,6 +725,12 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
return false;
+ if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
+ STI->avoidMOVsShifterOperand())
+ // Don't issue movs with shifter operand for some CPUs unless we
+ // are optimizing / minimizing for size.
+ return false;
+
unsigned Limit = ~0U;
if (Entry.Imm1Limit)
Limit = (1 << Entry.Imm1Limit) - 1;
@@ -841,6 +864,32 @@ static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
return LiveCPSR;
}
+bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
+ bool LiveCPSR, MachineInstr *CPSRDef,
+ bool IsSelfLoop) {
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
+ if (OPI == ReduceOpcodeMap.end())
+ return false;
+ const ReduceEntry &Entry = ReduceTable[OPI->second];
+
+ // Don't attempt normal reductions on "special" cases for now.
+ if (Entry.Special)
+ return ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+
+ // Try to transform to a 16-bit two-address instruction.
+ if (Entry.NarrowOpc2 &&
+ ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
+ return true;
+
+ // Try to transform to a 16-bit non-two-address instruction.
+ if (Entry.NarrowOpc1 &&
+ ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
+ return true;
+
+ return false;
+}
+
bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
bool Modified = false;
@@ -865,40 +914,20 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
- unsigned Opcode = MI->getOpcode();
- DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
- if (OPI != ReduceOpcodeMap.end()) {
- const ReduceEntry &Entry = ReduceTable[OPI->second];
- // Ignore "special" cases for now.
- if (Entry.Special) {
- if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
- Modified = true;
- MachineBasicBlock::instr_iterator I = prior(NextMII);
- MI = &*I;
- }
- goto ProcessNext;
- }
-
- // Try to transform to a 16-bit two-address instruction.
- if (Entry.NarrowOpc2 &&
- ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
- Modified = true;
- MachineBasicBlock::instr_iterator I = prior(NextMII);
- MI = &*I;
- goto ProcessNext;
- }
-
- // Try to transform to a 16-bit non-two-address instruction.
- if (Entry.NarrowOpc1 &&
- ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
- Modified = true;
- MachineBasicBlock::instr_iterator I = prior(NextMII);
- MI = &*I;
- }
+ // Does NextMII belong to the same bundle as MI?
+ bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
+
+ if (ReduceMI(MBB, MI, LiveCPSR, CPSRDef, IsSelfLoop)) {
+ Modified = true;
+ MachineBasicBlock::instr_iterator I = prior(NextMII);
+ MI = &*I;
+ // Removing and reinserting the first instruction in a bundle will break
+ // up the bundle. Fix the bundling if it was broken.
+ if (NextInSameBundle && !NextMII->isBundledWithPred())
+ NextMII->bundleWithPred();
}
- ProcessNext:
- if (NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle()) {
+ if (!NextInSameBundle && MI->isInsideBundle()) {
// FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
// marker is only on the BUNDLE instruction. Process the BUNDLE
// instruction as we finish with the bundled instruction to work around
@@ -931,6 +960,13 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
STI = &TM.getSubtarget<ARMSubtarget>();
+ // Optimizing / minimizing size?
+ AttributeSet FnAttrs = MF.getFunction()->getAttributes();
+ OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize);
+ MinimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::MinSize);
+
bool Modified = false;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
Modified |= ReduceMBB(*I);