diff options
Diffstat (limited to 'lib/Target/ARM')
76 files changed, 5782 insertions, 2511 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 2a1e8e4..5faf8c3 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -37,12 +37,16 @@ FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); +FunctionPass *createARMGlobalBaseRegPass(); FunctionPass *createARMGlobalMergePass(const TargetLowering* tli); FunctionPass *createARMConstantIslandPass(); FunctionPass *createMLxExpansionPass(); FunctionPass *createThumb2ITBlockPass(); FunctionPass *createThumb2SizeReductionPass(); +/// \brief Creates an ARM-specific Target Transformation Info pass. +ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM); + void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 69e2346..a76715a 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -32,9 +32,6 @@ def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", "Enable VFP3 instructions", [FeatureVFP2]>; -def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", - "Enable VFP4 instructions", - [FeatureVFP3]>; def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable NEON instructions", [FeatureVFP3]>; @@ -44,10 +41,16 @@ def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", "Does not support ARM mode execution">; def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision floating point">; +def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", + "Enable VFP4 instructions", + [FeatureVFP3, FeatureFP16]>; def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", "Restrict VFP3 to 16 double registers">; def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", "Enable divide instructions">; +def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", + "HasHardwareDivideInARM", "true", + "Enable divide instructions in ARM mode">; def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true", "Enable Thumb2 extract and pack instructions">; def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", @@ -86,6 +89,10 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", "AvoidCPSRPartialUpdate", "true", "Avoid CPSR partial update for OOO execution">; +def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", + "AvoidMOVsShifterOperand", "true", + "Avoid movs instructions with shifter operand">; + // Some processors perform return stack prediction. CodeGen should avoid issue // "normal" call instructions to callees which do not return. def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true", @@ -129,6 +136,11 @@ def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", include "ARMSchedule.td" // ARM processor families. +def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5", + "Cortex-A5 ARM processors", + [FeatureSlowFPBrcc, FeatureNEONForFP, + FeatureHasSlowFPVMLx, FeatureVMLxForwarding, + FeatureT2XtPk]>; def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", "Cortex-A8 ARM processors", [FeatureSlowFPBrcc, FeatureNEONForFP, @@ -139,6 +151,25 @@ def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", [FeatureVMLxForwarding, FeatureT2XtPk, FeatureFP16, FeatureAvoidPartialCPSR]>; +def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", + "Swift ARM processors", + [FeatureNEONForFP, FeatureT2XtPk, + FeatureVFP4, FeatureMP, FeatureHWDiv, + FeatureHWDivARM, FeatureAvoidPartialCPSR, + FeatureAvoidMOVsShOp, + FeatureHasSlowFPVMLx]>; + +// FIXME: It has not been determined if A15 has these features. +def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", + "Cortex-A15 ARM processors", + [FeatureT2XtPk, FeatureFP16, + FeatureAvoidPartialCPSR]>; +def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", + "Cortex-R5 ARM processors", + [FeatureSlowFPBrcc, FeatureHWDivARM, + FeatureHasSlowFPVMLx, + FeatureAvoidPartialCPSR, + FeatureT2XtPk]>; class ProcNoItin<string Name, list<SubtargetFeature> Features> : Processor<Name, NoItineraries, Features>; @@ -204,6 +235,11 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2, FeatureDSPThumb2]>; // V7a Processors. +// FIXME: A5 has currently the same Schedule model as A8 +def : ProcessorModel<"cortex-a5", CortexA8Model, + [ProcA5, HasV7Ops, FeatureNEON, FeatureDB, + FeatureVFP4, FeatureDSPThumb2, + FeatureHasRAS]>; def : ProcessorModel<"cortex-a8", CortexA8Model, [ProcA8, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS]>; @@ -214,6 +250,15 @@ def : ProcessorModel<"cortex-a9-mp", CortexA9Model, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureMP, FeatureHasRAS]>; +// FIXME: A15 has currently the same ProcessorModel as A9. +def : ProcessorModel<"cortex-a15", CortexA9Model, + [ProcA15, HasV7Ops, FeatureNEON, FeatureDB, + FeatureDSPThumb2, FeatureHasRAS]>; +// FIXME: R5 has currently the same ProcessorModel as A8. +def : ProcessorModel<"cortex-r5", CortexA8Model, + [ProcR5, HasV7Ops, FeatureDB, + FeatureVFP3, FeatureDSPThumb2, + FeatureHasRAS]>; // V7M Processors. def : ProcNoItin<"cortex-m3", [HasV7Ops, @@ -227,6 +272,12 @@ def : ProcNoItin<"cortex-m4", [HasV7Ops, FeatureT2XtPk, FeatureVFP4, FeatureVFPOnlySP, FeatureMClass]>; +// Swift uArch Processors. +def : ProcessorModel<"swift", SwiftModel, + [ProcSwift, HasV7Ops, FeatureNEON, + FeatureDB, FeatureDSPThumb2, + FeatureHasRAS]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index e9e2803..fc6ac90 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -23,31 +23,33 @@ #include "InstPrinter/ARMInstPrinter.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMMCExpr.h" -#include "llvm/Constants.h" -#include "llvm/DebugInfo.h" -#include "llvm/Module.h" -#include "llvm/Type.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Assembly/Writer.h" -#include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/SmallString.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetMachine.h" #include <cctype> using namespace llvm; @@ -302,7 +304,7 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() { } void ARMAsmPrinter::EmitXXStructor(const Constant *CV) { - uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType()); + uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType()); assert(Size && "C++ constructor pointer had zero size!"); const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts()); @@ -389,16 +391,6 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, //===--------------------------------------------------------------------===// MCSymbol *ARMAsmPrinter:: -GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2, - const MachineBasicBlock *MBB) const { - SmallString<60> Name; - raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() - << getFunctionNumber() << '_' << uid << '_' << uid2 - << "_set_" << MBB->getNumber(); - return OutContext.GetOrCreateSymbol(Name.str()); -} - -MCSymbol *ARMAsmPrinter:: GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const { SmallString<60> Name; raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "JTI" @@ -407,7 +399,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const { } -MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel(void) const { +MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const { SmallString<60> Name; raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "SJLJEH" << getFunctionNumber(); @@ -592,9 +584,24 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { const TargetLoweringObjectFileMachO &TLOFMacho = static_cast<const TargetLoweringObjectFileMachO &>( getObjFileLowering()); - OutStreamer.SwitchSection(TLOFMacho.getTextSection()); - OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection()); - OutStreamer.SwitchSection(TLOFMacho.getConstTextCoalSection()); + + // Collect the set of sections our functions will go into. + SetVector<const MCSection *, SmallVector<const MCSection *, 8>, + SmallPtrSet<const MCSection *, 8> > TextSections; + // Default text section comes first. + TextSections.insert(TLOFMacho.getTextSection()); + // Now any user defined text sections from function attributes. + for (Module::iterator F = M.begin(), e = M.end(); F != e; ++F) + if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage()) + TextSections.insert(TLOFMacho.SectionForGlobal(F, Mang, TM)); + // Now the coalescable sections. + TextSections.insert(TLOFMacho.getTextCoalSection()); + TextSections.insert(TLOFMacho.getConstTextCoalSection()); + + // Emit the sections in the .s file header to fix the order. + for (unsigned i = 0, e = TextSections.size(); i != e; ++i) + OutStreamer.SwitchSection(TextSections[i]); + if (RelocM == Reloc::DynamicNoPIC) { const MCSection *sect = OutContext.getMachOSection("__TEXT", "__symbol_stub4", @@ -743,13 +750,28 @@ void ARMAsmPrinter::emitAttributes() { AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed); } else if (CPUString == "generic") { - // FIXME: Why these defaults? - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T); + // For a generic CPU, we assume a standard v7a architecture in Subtarget. + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::ApplicationProfile); AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use, ARMBuildAttrs::Allowed); AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::Allowed); - } + ARMBuildAttrs::AllowThumb32); + } else if (Subtarget->hasV7Ops()) { + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); + AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, + ARMBuildAttrs::AllowThumb32); + } else if (Subtarget->hasV6T2Ops()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6T2); + else if (Subtarget->hasV6Ops()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6); + else if (Subtarget->hasV5TEOps()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TE); + else if (Subtarget->hasV5TOps()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5T); + else if (Subtarget->hasV4TOps()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T); if (Subtarget->hasNEON() && emitFPU) { /* NEON is not exactly a VFP architecture, but GAS emit one of @@ -893,7 +915,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) { void ARMAsmPrinter:: EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { - int Size = TM.getTargetData()->getTypeAllocSize(MCPV->getType()); + int Size = TM.getDataLayout()->getTypeAllocSize(MCPV->getType()); ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV); @@ -1030,12 +1052,10 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { OutContext); // If this isn't a TBB or TBH, the entries are direct branch instructions. if (OffsetWidth == 4) { - MCInst BrInst; - BrInst.setOpcode(ARM::t2B); - BrInst.addOperand(MCOperand::CreateExpr(MBBSymbolExpr)); - BrInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - BrInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(BrInst); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2B) + .addExpr(MBBSymbolExpr) + .addImm(ARMCC::AL) + .addReg(0)); continue; } // Otherwise it's an offset from the dispatch instruction. Construct an @@ -1079,28 +1099,6 @@ void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, printOperand(MI, NOps-2, OS); } -static void populateADROperands(MCInst &Inst, unsigned Dest, - const MCSymbol *Label, - unsigned pred, unsigned ccreg, - MCContext &Ctx) { - const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, Ctx); - Inst.addOperand(MCOperand::CreateReg(Dest)); - Inst.addOperand(MCOperand::CreateExpr(SymbolExpr)); - // Add predicate operands. - Inst.addOperand(MCOperand::CreateImm(pred)); - Inst.addOperand(MCOperand::CreateReg(ccreg)); -} - -void ARMAsmPrinter::EmitPatchedInstruction(const MachineInstr *MI, - unsigned Opcode) { - MCInst TmpInst; - - // Emit the instruction as usual, just patch the opcode. - LowerARMMachineInstrToMCInst(MI, TmpInst, *this); - TmpInst.setOpcode(Opcode); - OutStreamer.EmitInstruction(TmpInst); -} - void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { assert(MI->getFlag(MachineInstr::FrameSetup) && "Only instruction which are involved into frame setup code are allowed"); @@ -1277,154 +1275,104 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { case ARM::tLEApcrel: case ARM::t2LEApcrel: { // FIXME: Need to also handle globals and externals - MCInst TmpInst; - TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrel ? ARM::t2ADR - : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR - : ARM::ADR)); - populateADROperands(TmpInst, MI->getOperand(0).getReg(), - GetCPISymbol(MI->getOperand(1).getIndex()), - MI->getOperand(2).getImm(), MI->getOperand(3).getReg(), - OutContext); - OutStreamer.EmitInstruction(TmpInst); + MCSymbol *CPISymbol = GetCPISymbol(MI->getOperand(1).getIndex()); + OutStreamer.EmitInstruction(MCInstBuilder(MI->getOpcode() == + ARM::t2LEApcrel ? ARM::t2ADR + : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR + : ARM::ADR)) + .addReg(MI->getOperand(0).getReg()) + .addExpr(MCSymbolRefExpr::Create(CPISymbol, OutContext)) + // Add predicate operands. + .addImm(MI->getOperand(2).getImm()) + .addReg(MI->getOperand(3).getReg())); return; } case ARM::LEApcrelJT: case ARM::tLEApcrelJT: case ARM::t2LEApcrelJT: { - MCInst TmpInst; - TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrelJT ? ARM::t2ADR - : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR - : ARM::ADR)); - populateADROperands(TmpInst, MI->getOperand(0).getReg(), - GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(), - MI->getOperand(2).getImm()), - MI->getOperand(3).getImm(), MI->getOperand(4).getReg(), - OutContext); - OutStreamer.EmitInstruction(TmpInst); + MCSymbol *JTIPICSymbol = + GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(), + MI->getOperand(2).getImm()); + OutStreamer.EmitInstruction(MCInstBuilder(MI->getOpcode() == + ARM::t2LEApcrelJT ? ARM::t2ADR + : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR + : ARM::ADR)) + .addReg(MI->getOperand(0).getReg()) + .addExpr(MCSymbolRefExpr::Create(JTIPICSymbol, OutContext)) + // Add predicate operands. + .addImm(MI->getOperand(3).getImm()) + .addReg(MI->getOperand(4).getReg())); return; } // Darwin call instructions are just normal call instructions with different // clobber semantics (they clobber R9). case ARM::BX_CALL: { - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::MOVr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr) + .addReg(ARM::LR) + .addReg(ARM::PC) // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); + .addImm(ARMCC::AL) + .addReg(0) // Add 's' bit operand (always reg0 for this) - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::BX); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - OutStreamer.EmitInstruction(TmpInst); - } + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::BX) + .addReg(MI->getOperand(0).getReg())); return; } case ARM::tBX_CALL: { - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr) + .addReg(ARM::LR) + .addReg(ARM::PC) // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tBX); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tBX) + .addReg(MI->getOperand(0).getReg()) // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } + .addImm(ARMCC::AL) + .addReg(0)); return; } case ARM::BMOVPCRX_CALL: { - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::MOVr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr) + .addReg(ARM::LR) + .addReg(ARM::PC) // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); + .addImm(ARMCC::AL) + .addReg(0) // Add 's' bit operand (always reg0 for this) - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::MOVr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr) + .addReg(ARM::PC) + .addImm(MI->getOperand(0).getReg()) // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); + .addImm(ARMCC::AL) + .addReg(0) // Add 's' bit operand (always reg0 for this) - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } + .addReg(0)); return; } case ARM::BMOVPCB_CALL: { - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::MOVr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr) + .addReg(ARM::LR) + .addReg(ARM::PC) // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); + .addImm(ARMCC::AL) + .addReg(0) // Add 's' bit operand (always reg0 for this) - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::Bcc); - const GlobalValue *GV = MI->getOperand(0).getGlobal(); - MCSymbol *GVSym = Mang->getSymbol(GV); - const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); - TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr)); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - return; - } - case ARM::t2BMOVPCB_CALL: { - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::t2B); - const GlobalValue *GV = MI->getOperand(0).getGlobal(); - MCSymbol *GVSym = Mang->getSymbol(GV); - const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); - TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr)); + .addReg(0)); + + const GlobalValue *GV = MI->getOperand(0).getGlobal(); + MCSymbol *GVSym = Mang->getSymbol(GV); + const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::Bcc) + .addExpr(GVSymExpr) // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } + .addImm(ARMCC::AL) + .addReg(0)); return; } case ARM::MOVi16_ga_pcrel: @@ -1512,15 +1460,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutContext)); // Form and emit the add. - MCInst AddInst; - AddInst.setOpcode(ARM::tADDhirr); - AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - AddInst.addOperand(MCOperand::CreateReg(ARM::PC)); - // Add predicate operands. - AddInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - AddInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(AddInst); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tADDhirr) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(0).getReg()) + .addReg(ARM::PC) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); return; } case ARM::PICADD: { @@ -1535,17 +1481,15 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutContext)); // Form and emit the add. - MCInst AddInst; - AddInst.setOpcode(ARM::ADDrr); - AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - AddInst.addOperand(MCOperand::CreateReg(ARM::PC)); - AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); - // Add predicate operands. - AddInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm())); - AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg())); - // Add 's' bit operand (always reg0 for this) - AddInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(AddInst); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDrr) + .addReg(MI->getOperand(0).getReg()) + .addReg(ARM::PC) + .addReg(MI->getOperand(1).getReg()) + // Add predicate operands. + .addImm(MI->getOperand(3).getImm()) + .addReg(MI->getOperand(4).getReg()) + // Add 's' bit operand (always reg0 for this) + .addReg(0)); return; } case ARM::PICSTR: @@ -1581,16 +1525,14 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { case ARM::PICLDRSB: Opcode = ARM::LDRSB; break; case ARM::PICLDRSH: Opcode = ARM::LDRSH; break; } - MCInst LdStInst; - LdStInst.setOpcode(Opcode); - LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - LdStInst.addOperand(MCOperand::CreateReg(ARM::PC)); - LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); - LdStInst.addOperand(MCOperand::CreateImm(0)); - // Add predicate operands. - LdStInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm())); - LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg())); - OutStreamer.EmitInstruction(LdStInst); + OutStreamer.EmitInstruction(MCInstBuilder(Opcode) + .addReg(MI->getOperand(0).getReg()) + .addReg(ARM::PC) + .addReg(MI->getOperand(1).getReg()) + .addImm(0) + // Add predicate operands. + .addImm(MI->getOperand(3).getImm()) + .addReg(MI->getOperand(4).getReg())); return; } @@ -1620,29 +1562,26 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case ARM::t2BR_JT: { // Lower and emit the instruction itself, then the jump table following it. - MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr) + .addReg(ARM::PC) + .addReg(MI->getOperand(0).getReg()) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); + // Output the data for the jump table itself EmitJump2Table(MI); return; } case ARM::t2TBB_JT: { // Lower and emit the instruction itself, then the jump table following it. - MCInst TmpInst; + OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2TBB) + .addReg(ARM::PC) + .addReg(MI->getOperand(0).getReg()) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); - TmpInst.setOpcode(ARM::t2TBB); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); // Output the data for the jump table itself EmitJump2Table(MI); // Make sure the next instruction is 2-byte aligned. @@ -1651,15 +1590,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case ARM::t2TBH_JT: { // Lower and emit the instruction itself, then the jump table following it. - MCInst TmpInst; + OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2TBH) + .addReg(ARM::PC) + .addReg(MI->getOperand(0).getReg()) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); - TmpInst.setOpcode(ARM::t2TBH); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); // Output the data for the jump table itself EmitJump2Table(MI); return; @@ -1719,17 +1656,15 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { case ARM::BR_JTadd: { // Lower and emit the instruction itself, then the jump table following it. // add pc, target, idx - MCInst TmpInst; - TmpInst.setOpcode(ARM::ADDrr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - // Add 's' bit operand (always reg0 for this) - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDrr) + .addReg(ARM::PC) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0) + // Add 's' bit operand (always reg0 for this) + .addReg(0)); // Output the data for the jump table itself EmitJumpTable(MI); @@ -1773,75 +1708,57 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned SrcReg = MI->getOperand(0).getReg(); unsigned ValReg = MI->getOperand(1).getReg(); MCSymbol *Label = GetARMSJLJEHLabel(); - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVr); - TmpInst.addOperand(MCOperand::CreateReg(ValReg)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + OutStreamer.AddComment("eh_setjmp begin"); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr) + .addReg(ValReg) + .addReg(ARM::PC) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.AddComment("eh_setjmp begin"); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tADDi3); - TmpInst.addOperand(MCOperand::CreateReg(ValReg)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tADDi3) + .addReg(ValReg) // 's' bit operand - TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR)); - TmpInst.addOperand(MCOperand::CreateReg(ValReg)); - TmpInst.addOperand(MCOperand::CreateImm(7)); + .addReg(ARM::CPSR) + .addReg(ValReg) + .addImm(7) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tSTRi); - TmpInst.addOperand(MCOperand::CreateReg(ValReg)); - TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tSTRi) + .addReg(ValReg) + .addReg(SrcReg) // The offset immediate is #4. The operand value is scaled by 4 for the // tSTR instruction. - TmpInst.addOperand(MCOperand::CreateImm(1)); + .addImm(1) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVi8); - TmpInst.addOperand(MCOperand::CreateReg(ARM::R0)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR)); - TmpInst.addOperand(MCOperand::CreateImm(0)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVi8) + .addReg(ARM::R0) + .addReg(ARM::CPSR) + .addImm(0) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext); - MCInst TmpInst; - TmpInst.setOpcode(ARM::tB); - TmpInst.addOperand(MCOperand::CreateExpr(SymbolExpr)); - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVi8); - TmpInst.addOperand(MCOperand::CreateReg(ARM::R0)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR)); - TmpInst.addOperand(MCOperand::CreateImm(1)); + .addImm(ARMCC::AL) + .addReg(0)); + + const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tB) + .addExpr(SymbolExpr) + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.AddComment("eh_setjmp end"); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVi8) + .addReg(ARM::R0) + .addReg(ARM::CPSR) + .addImm(1) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.AddComment("eh_setjmp end"); - OutStreamer.EmitInstruction(TmpInst); - } + .addImm(ARMCC::AL) + .addReg(0)); + OutStreamer.EmitLabel(Label); return; } @@ -1857,69 +1774,53 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned SrcReg = MI->getOperand(0).getReg(); unsigned ValReg = MI->getOperand(1).getReg(); - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::ADDri); - TmpInst.addOperand(MCOperand::CreateReg(ValReg)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - TmpInst.addOperand(MCOperand::CreateImm(8)); + OutStreamer.AddComment("eh_setjmp begin"); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDri) + .addReg(ValReg) + .addReg(ARM::PC) + .addImm(8) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); + .addImm(ARMCC::AL) + .addReg(0) // 's' bit operand (always reg0 for this). - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.AddComment("eh_setjmp begin"); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::STRi12); - TmpInst.addOperand(MCOperand::CreateReg(ValReg)); - TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); - TmpInst.addOperand(MCOperand::CreateImm(4)); + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::STRi12) + .addReg(ValReg) + .addReg(SrcReg) + .addImm(4) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::MOVi); - TmpInst.addOperand(MCOperand::CreateReg(ARM::R0)); - TmpInst.addOperand(MCOperand::CreateImm(0)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVi) + .addReg(ARM::R0) + .addImm(0) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); + .addImm(ARMCC::AL) + .addReg(0) // 's' bit operand (always reg0 for this). - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::ADDri); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - TmpInst.addOperand(MCOperand::CreateImm(0)); + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDri) + .addReg(ARM::PC) + .addReg(ARM::PC) + .addImm(0) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); + .addImm(ARMCC::AL) + .addReg(0) // 's' bit operand (always reg0 for this). - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::MOVi); - TmpInst.addOperand(MCOperand::CreateReg(ARM::R0)); - TmpInst.addOperand(MCOperand::CreateImm(1)); + .addReg(0)); + + OutStreamer.AddComment("eh_setjmp end"); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVi) + .addReg(ARM::R0) + .addImm(1) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); + .addImm(ARMCC::AL) + .addReg(0) // 's' bit operand (always reg0 for this). - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.AddComment("eh_setjmp end"); - OutStreamer.EmitInstruction(TmpInst); - } + .addReg(0)); return; } case ARM::Int_eh_sjlj_longjmp: { @@ -1929,48 +1830,35 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // bx $scratch unsigned SrcReg = MI->getOperand(0).getReg(); unsigned ScratchReg = MI->getOperand(1).getReg(); - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::LDRi12); - TmpInst.addOperand(MCOperand::CreateReg(ARM::SP)); - TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); - TmpInst.addOperand(MCOperand::CreateImm(8)); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12) + .addReg(ARM::SP) + .addReg(SrcReg) + .addImm(8) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::LDRi12); - TmpInst.addOperand(MCOperand::CreateReg(ScratchReg)); - TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); - TmpInst.addOperand(MCOperand::CreateImm(4)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12) + .addReg(ScratchReg) + .addReg(SrcReg) + .addImm(4) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::LDRi12); - TmpInst.addOperand(MCOperand::CreateReg(ARM::R7)); - TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); - TmpInst.addOperand(MCOperand::CreateImm(0)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12) + .addReg(ARM::R7) + .addReg(SrcReg) + .addImm(0) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::BX); - TmpInst.addOperand(MCOperand::CreateReg(ScratchReg)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::BX) + .addReg(ScratchReg) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } + .addImm(ARMCC::AL) + .addReg(0)); return; } case ARM::tInt_eh_sjlj_longjmp: { @@ -1981,60 +1869,44 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // bx $scratch unsigned SrcReg = MI->getOperand(0).getReg(); unsigned ScratchReg = MI->getOperand(1).getReg(); - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tLDRi); - TmpInst.addOperand(MCOperand::CreateReg(ScratchReg)); - TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi) + .addReg(ScratchReg) + .addReg(SrcReg) // The offset immediate is #8. The operand value is scaled by 4 for the // tLDR instruction. - TmpInst.addOperand(MCOperand::CreateImm(2)); + .addImm(2) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::SP)); - TmpInst.addOperand(MCOperand::CreateReg(ScratchReg)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr) + .addReg(ARM::SP) + .addReg(ScratchReg) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tLDRi); - TmpInst.addOperand(MCOperand::CreateReg(ScratchReg)); - TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); - TmpInst.addOperand(MCOperand::CreateImm(1)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi) + .addReg(ScratchReg) + .addReg(SrcReg) + .addImm(1) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tLDRi); - TmpInst.addOperand(MCOperand::CreateReg(ARM::R7)); - TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); - TmpInst.addOperand(MCOperand::CreateImm(0)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi) + .addReg(ARM::R7) + .addReg(SrcReg) + .addImm(0) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tBX); - TmpInst.addOperand(MCOperand::CreateReg(ScratchReg)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tBX) + .addReg(ScratchReg) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } + .addImm(ARMCC::AL) + .addReg(0)); return; } } diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 3555e8f..f7392fb 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -53,7 +53,7 @@ public: Subtarget = &TM.getSubtarget<ARMSubtarget>(); } - virtual const char *getPassName() const { + virtual const char *getPassName() const LLVM_OVERRIDE { return "ARM Assembly Printer"; } @@ -62,22 +62,24 @@ public: virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); + raw_ostream &O) LLVM_OVERRIDE; virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, - const char *ExtraCode, raw_ostream &O); + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O) LLVM_OVERRIDE; void EmitJumpTable(const MachineInstr *MI); void EmitJump2Table(const MachineInstr *MI); - virtual void EmitInstruction(const MachineInstr *MI); - bool runOnMachineFunction(MachineFunction &F); + virtual void EmitInstruction(const MachineInstr *MI) LLVM_OVERRIDE; + virtual bool runOnMachineFunction(MachineFunction &F) LLVM_OVERRIDE; - virtual void EmitConstantPool() {} // we emit constant pools customly! - virtual void EmitFunctionBodyEnd(); - virtual void EmitFunctionEntryLabel(); - void EmitStartOfAsmFile(Module &M); - void EmitEndOfAsmFile(Module &M); - void EmitXXStructor(const Constant *CV); + virtual void EmitConstantPool() LLVM_OVERRIDE { + // we emit constant pools customly! + } + virtual void EmitFunctionBodyEnd() LLVM_OVERRIDE; + virtual void EmitFunctionEntryLabel() LLVM_OVERRIDE; + virtual void EmitStartOfAsmFile(Module &M) LLVM_OVERRIDE; + virtual void EmitEndOfAsmFile(Module &M) LLVM_OVERRIDE; + virtual void EmitXXStructor(const Constant *CV) LLVM_OVERRIDE; // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); @@ -101,12 +103,13 @@ private: public: void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); - MachineLocation getDebugValueLocation(const MachineInstr *MI) const; + virtual MachineLocation + getDebugValueLocation(const MachineInstr *MI) const LLVM_OVERRIDE; /// EmitDwarfRegOp - Emit dwarf register operation. - virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const; + virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const LLVM_OVERRIDE; - virtual unsigned getISAEncoding() { + virtual unsigned getISAEncoding() LLVM_OVERRIDE { // ARM/Darwin adds ISA to the DWARF info for each function. if (!Subtarget->isTargetDarwin()) return 0; @@ -114,18 +117,19 @@ public: ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm; } +private: MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol); - MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2, - const MachineBasicBlock *MBB) const; MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const; - MCSymbol *GetARMSJLJEHLabel(void) const; + MCSymbol *GetARMSJLJEHLabel() const; MCSymbol *GetARMGVSymbol(const GlobalValue *GV); +public: /// EmitMachineConstantPoolValue - Print a machine constantpool value to /// the .s file. - virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV); + virtual void + EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) LLVM_OVERRIDE; }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index e2f0d7d..0076910 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -18,9 +18,7 @@ #include "ARMHazardRecognizer.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/GlobalValue.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -29,12 +27,14 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/ADT/STLExtras.h" #define GET_INSTRINFO_CTOR #include "ARMGenInstrInfo.inc" @@ -49,6 +49,11 @@ static cl::opt<bool> WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true), cl::desc("Widen ARM vmovs to vmovd when possible")); +static cl::opt<unsigned> +SwiftPartialUpdateClearance("swift-partial-update-clearance", + cl::Hidden, cl::init(12), + cl::desc("Clearance before partial register updates")); + /// ARM_MLxEntry - Record information about MLA / MLS instructions. struct ARM_MLxEntry { uint16_t MLxOpc; // MLA / MLS opcode @@ -101,7 +106,7 @@ CreateTargetHazardRecognizer(const TargetMachine *TM, const InstrItineraryData *II = TM->getInstrItineraryData(); return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); } - return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG); + return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG); } ScheduleHazardRecognizer *ARMBaseInstrInfo:: @@ -110,7 +115,7 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, if (Subtarget.isThumb2() || Subtarget.hasVFP2()) return (ScheduleHazardRecognizer *) new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG); - return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG); + return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); } MachineInstr * @@ -459,8 +464,9 @@ PredicateInstruction(MachineInstr *MI, unsigned Opc = MI->getOpcode(); if (isUncondBranchOpcode(Opc)) { MI->setDesc(get(getMatchingCondBranchOpcode(Opc))); - MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm())); - MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addImm(Pred[0].getImm()) + .addReg(Pred[1].getReg()); return true; } @@ -697,6 +703,8 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3; else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4; + else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) + Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2; else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2; @@ -786,6 +794,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA)) + .addFrameIndex(FI)) + .addMemOperand(MMO); + MIB = AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); } else llvm_unreachable("Unknown reg class!"); break; @@ -933,6 +948,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = @@ -958,6 +974,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (ARM::DPRRegClass.hasSubClassEq(RC)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { + unsigned LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA : ARM::LDMIA; + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(LdmOpc)) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + MIB.addReg(DestReg, RegState::ImplicitDefine); } else llvm_unreachable("Unknown reg class!"); break; @@ -1130,6 +1155,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ // All clear, widen the COPY. DEBUG(dbgs() << "widening: " << *MI); + MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg // or some other super-register. @@ -1141,14 +1167,14 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ MI->setDesc(get(ARM::VMOVD)); MI->getOperand(0).setReg(DstRegD); MI->getOperand(1).setReg(SrcRegD); - AddDefaultPred(MachineInstrBuilder(MI)); + AddDefaultPred(MIB); // We are now reading SrcRegD instead of SrcRegS. This may upset the // register scavenger and machine verifier, so we need to indicate that we // are reading an undefined value from SrcRegD, but a proper value from // SrcRegS. MI->getOperand(1).setIsUndef(); - MachineInstrBuilder(MI).addReg(SrcRegS, RegState::Implicit); + MIB.addReg(SrcRegS, RegState::Implicit); // SrcRegD may actually contain an unrelated value in the ssub_1 // sub-register. Don't kill it. Only kill the ssub_0 sub-register. @@ -1245,7 +1271,7 @@ reMaterialize(MachineBasicBlock &MBB, MachineInstr * ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { - MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF); + MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF); switch(Orig->getOpcode()) { case ARM::tLDRpci_pic: case ARM::t2LDRpci_pic: { @@ -1349,6 +1375,9 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, /// only return true if the base pointers are the same and the only differences /// between the two addresses is the offset. It also returns the offsets by /// reference. +/// +/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched +/// is permanently disabled. bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const { @@ -1389,7 +1418,6 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case ARM::VLDRD: case ARM::VLDRS: case ARM::t2LDRi8: - case ARM::t2LDRDi8: case ARM::t2LDRSHi8: case ARM::t2LDRi12: case ARM::t2LDRSHi12: @@ -1424,6 +1452,9 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, /// from the common base address. It returns true if it decides it's desirable /// to schedule the two loads together. "NumLoads" is the number of loads that /// have already been scheduled after Load1. +/// +/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched +/// is permanently disabled. bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const { @@ -1528,6 +1559,14 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB, return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost; } +bool +ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, + MachineBasicBlock &FMBB) const { + // Reduce false anti-dependencies to let Swift's out-of-order execution + // engine do its thing. + return Subtarget.isSwift(); +} + /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. @@ -1567,7 +1606,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { // MOVCC AL can't be inverted. Shouldn't happen. if (CC == ARMCC::AL || PredReg != ARM::CPSR) return NULL; - MI = TargetInstrInfoImpl::commuteInstruction(MI, NewMI); + MI = TargetInstrInfo::commuteInstruction(MI, NewMI); if (!MI) return NULL; // After swapping the MOVCC operands, also invert the condition. @@ -1576,7 +1615,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { return MI; } } - return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); + return TargetInstrInfo::commuteInstruction(MI, NewMI); } /// Identify instructions that can be folded into a MOVCC instruction, and @@ -1679,7 +1718,7 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, // same register as operand 0. MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); FalseReg.setImplicit(); - NewMI->addOperand(FalseReg); + NewMI.addOperand(FalseReg); NewMI->tieOperands(0, NewMI->getNumOperands() - 1); // The caller will erase MI, but not DefMI. @@ -2342,6 +2381,260 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, return true; } +static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, + const MachineInstr *MI) { + switch (MI->getOpcode()) { + default: { + const MCInstrDesc &Desc = MI->getDesc(); + int UOps = ItinData->getNumMicroOps(Desc.getSchedClass()); + assert(UOps >= 0 && "bad # UOps"); + return UOps; + } + + case ARM::LDRrs: + case ARM::LDRBrs: + case ARM::STRrs: + case ARM::STRBrs: { + unsigned ShOpVal = MI->getOperand(3).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 1; + return 2; + } + + case ARM::LDRH: + case ARM::STRH: { + if (!MI->getOperand(2).getReg()) + return 1; + + unsigned ShOpVal = MI->getOperand(3).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 1; + return 2; + } + + case ARM::LDRSB: + case ARM::LDRSH: + return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2; + + case ARM::LDRSB_POST: + case ARM::LDRSH_POST: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + return (Rt == Rm) ? 4 : 3; + } + + case ARM::LDR_PRE_REG: + case ARM::LDRB_PRE_REG: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + if (Rt == Rm) + return 3; + unsigned ShOpVal = MI->getOperand(4).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 2; + return 3; + } + + case ARM::STR_PRE_REG: + case ARM::STRB_PRE_REG: { + unsigned ShOpVal = MI->getOperand(4).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 2; + return 3; + } + + case ARM::LDRH_PRE: + case ARM::STRH_PRE: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + if (!Rm) + return 2; + if (Rt == Rm) + return 3; + return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) + ? 3 : 2; + } + + case ARM::LDR_POST_REG: + case ARM::LDRB_POST_REG: + case ARM::LDRH_POST: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + return (Rt == Rm) ? 3 : 2; + } + + case ARM::LDR_PRE_IMM: + case ARM::LDRB_PRE_IMM: + case ARM::LDR_POST_IMM: + case ARM::LDRB_POST_IMM: + case ARM::STRB_POST_IMM: + case ARM::STRB_POST_REG: + case ARM::STRB_PRE_IMM: + case ARM::STRH_POST: + case ARM::STR_POST_IMM: + case ARM::STR_POST_REG: + case ARM::STR_PRE_IMM: + return 2; + + case ARM::LDRSB_PRE: + case ARM::LDRSH_PRE: { + unsigned Rm = MI->getOperand(3).getReg(); + if (Rm == 0) + return 3; + unsigned Rt = MI->getOperand(0).getReg(); + if (Rt == Rm) + return 4; + unsigned ShOpVal = MI->getOperand(4).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 3; + return 4; + } + + case ARM::LDRD: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rn = MI->getOperand(2).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + if (Rm) + return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; + return (Rt == Rn) ? 3 : 2; + } + + case ARM::STRD: { + unsigned Rm = MI->getOperand(3).getReg(); + if (Rm) + return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; + return 2; + } + + case ARM::LDRD_POST: + case ARM::t2LDRD_POST: + return 3; + + case ARM::STRD_POST: + case ARM::t2STRD_POST: + return 4; + + case ARM::LDRD_PRE: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rn = MI->getOperand(3).getReg(); + unsigned Rm = MI->getOperand(4).getReg(); + if (Rm) + return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; + return (Rt == Rn) ? 4 : 3; + } + + case ARM::t2LDRD_PRE: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rn = MI->getOperand(3).getReg(); + return (Rt == Rn) ? 4 : 3; + } + + case ARM::STRD_PRE: { + unsigned Rm = MI->getOperand(4).getReg(); + if (Rm) + return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; + return 3; + } + + case ARM::t2STRD_PRE: + return 3; + + case ARM::t2LDR_POST: + case ARM::t2LDRB_POST: + case ARM::t2LDRB_PRE: + case ARM::t2LDRSBi12: + case ARM::t2LDRSBi8: + case ARM::t2LDRSBpci: + case ARM::t2LDRSBs: + case ARM::t2LDRH_POST: + case ARM::t2LDRH_PRE: + case ARM::t2LDRSBT: + case ARM::t2LDRSB_POST: + case ARM::t2LDRSB_PRE: + case ARM::t2LDRSH_POST: + case ARM::t2LDRSH_PRE: + case ARM::t2LDRSHi12: + case ARM::t2LDRSHi8: + case ARM::t2LDRSHpci: + case ARM::t2LDRSHs: + return 2; + + case ARM::t2LDRDi8: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rn = MI->getOperand(2).getReg(); + return (Rt == Rn) ? 3 : 2; + } + + case ARM::t2STRB_POST: + case ARM::t2STRB_PRE: + case ARM::t2STRBs: + case ARM::t2STRDi8: + case ARM::t2STRH_POST: + case ARM::t2STRH_PRE: + case ARM::t2STRHs: + case ARM::t2STR_POST: + case ARM::t2STR_PRE: + case ARM::t2STRs: + return 2; + } +} + +// Return the number of 32-bit words loaded by LDM or stored by STM. If this +// can't be easily determined return 0 (missing MachineMemOperand). +// +// FIXME: The current MachineInstr design does not support relying on machine +// mem operands to determine the width of a memory access. Instead, we expect +// the target to provide this information based on the instruction opcode and +// operands. However, using MachineMemOperand is a the best solution now for +// two reasons: +// +// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI +// operands. This is much more dangerous than using the MachineMemOperand +// sizes because CodeGen passes can insert/remove optional machine operands. In +// fact, it's totally incorrect for preRA passes and appears to be wrong for +// postRA passes as well. +// +// 2) getNumLDMAddresses is only used by the scheduling machine model and any +// machine model that calls this should handle the unknown (zero size) case. +// +// Long term, we should require a target hook that verifies MachineMemOperand +// sizes during MC lowering. That target hook should be local to MC lowering +// because we can't ensure that it is aware of other MI forms. Doing this will +// ensure that MachineMemOperands are correctly propagated through all passes. +unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const { + unsigned Size = 0; + for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), + E = MI->memoperands_end(); I != E; ++I) { + Size += (*I)->getSize(); + } + return Size / 4; +} + unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr *MI) const { @@ -2351,8 +2644,12 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, const MCInstrDesc &Desc = MI->getDesc(); unsigned Class = Desc.getSchedClass(); int ItinUOps = ItinData->getNumMicroOps(Class); - if (ItinUOps >= 0) + if (ItinUOps >= 0) { + if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore())) + return getNumMicroOpsSwiftLdSt(ItinData, MI); + return ItinUOps; + } unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -2421,7 +2718,43 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, case ARM::t2STMIA_UPD: case ARM::t2STMDB_UPD: { unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; - if (Subtarget.isCortexA8()) { + if (Subtarget.isSwift()) { + // rdar://8402126 + int UOps = 1 + NumRegs; // One for address computation, one for each ld / st. + switch (Opc) { + default: break; + case ARM::VLDMDIA_UPD: + case ARM::VLDMDDB_UPD: + case ARM::VLDMSIA_UPD: + case ARM::VLDMSDB_UPD: + case ARM::VSTMDIA_UPD: + case ARM::VSTMDDB_UPD: + case ARM::VSTMSIA_UPD: + case ARM::VSTMSDB_UPD: + case ARM::LDMIA_UPD: + case ARM::LDMDA_UPD: + case ARM::LDMDB_UPD: + case ARM::LDMIB_UPD: + case ARM::STMIA_UPD: + case ARM::STMDA_UPD: + case ARM::STMDB_UPD: + case ARM::STMIB_UPD: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA_UPD: + case ARM::t2LDMIA_UPD: + case ARM::t2LDMDB_UPD: + case ARM::t2STMIA_UPD: + case ARM::t2STMDB_UPD: + ++UOps; // One for base register writeback. + break; + case ARM::LDMIA_RET: + case ARM::tPOP_RET: + case ARM::t2LDMIA_RET: + UOps += 2; // One for base reg wb, one for write to pc. + break; + } + return UOps; + } else if (Subtarget.isCortexA8()) { if (NumRegs < 4) return 2; // 4 registers would be issued: 2, 2. @@ -2430,7 +2763,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, if (NumRegs % 2) ++A8UOps; return A8UOps; - } else if (Subtarget.isCortexA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { int A9UOps = (NumRegs / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. @@ -2463,7 +2796,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, DefCycle = RegNo / 2 + 1; if (RegNo % 2) ++DefCycle; - } else if (Subtarget.isCortexA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { DefCycle = RegNo; bool isSLoad = false; @@ -2507,7 +2840,7 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, DefCycle = 1; // Result latency is issue cycle + 2: E2. DefCycle += 2; - } else if (Subtarget.isCortexA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { DefCycle = (RegNo / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. @@ -2538,7 +2871,7 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, UseCycle = RegNo / 2 + 1; if (RegNo % 2) ++UseCycle; - } else if (Subtarget.isCortexA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { UseCycle = RegNo; bool isSStore = false; @@ -2579,7 +2912,7 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, UseCycle = 2; // Read in E3. UseCycle += 2; - } else if (Subtarget.isCortexA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { UseCycle = (RegNo / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. @@ -2764,7 +3097,7 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget, const MachineInstr *DefMI, const MCInstrDesc *DefMCID, unsigned DefAlign) { int Adjust = 0; - if (Subtarget.isCortexA8() || Subtarget.isCortexA9()) { + if (Subtarget.isCortexA8() || Subtarget.isLikeA9()) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. switch (DefMCID->getOpcode()) { @@ -2789,9 +3122,40 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget, break; } } + } else if (Subtarget.isSwift()) { + // FIXME: Properly handle all of the latency adjustments for address + // writeback. + switch (DefMCID->getOpcode()) { + default: break; + case ARM::LDRrs: + case ARM::LDRBrs: { + unsigned ShOpVal = DefMI->getOperand(3).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + Adjust -= 2; + else if (!isSub && + ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) + --Adjust; + break; + } + case ARM::t2LDRs: + case ARM::t2LDRBs: + case ARM::t2LDRHs: + case ARM::t2LDRSHs: { + // Thumb2 mode: lsl only. + unsigned ShAmt = DefMI->getOperand(3).getImm(); + if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3) + Adjust -= 2; + break; + } + } } - if (DefAlign < 8 && Subtarget.isCortexA9()) { + if (DefAlign < 8 && Subtarget.isLikeA9()) { switch (DefMCID->getOpcode()) { default: break; case ARM::VLD1q8: @@ -2949,7 +3313,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, if (Reg == ARM::CPSR) { if (DefMI->getOpcode() == ARM::FMSTAT) { // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) - return Subtarget.isCortexA9() ? 1 : 20; + return Subtarget.isLikeA9() ? 1 : 20; } // CPSR set and branch can be paired in the same cycle. @@ -2965,7 +3329,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // instructions). if (Latency > 0 && Subtarget.isThumb2()) { const MachineFunction *MF = DefMI->getParent()->getParent(); - if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + if (MF->getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize)) --Latency; } return Latency; @@ -3015,7 +3381,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, if (!UseNode->isMachineOpcode()) { int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); - if (Subtarget.isCortexA9()) + if (Subtarget.isLikeA9() || Subtarget.isSwift()) return Latency <= 2 ? 1 : Latency - 1; else return Latency <= 3 ? 1 : Latency - 2; @@ -3032,7 +3398,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, UseMCID, UseIdx, UseAlign); if (Latency > 1 && - (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { + (Subtarget.isCortexA8() || Subtarget.isLikeA9())) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. switch (DefMCID.getOpcode()) { @@ -3059,9 +3425,36 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, break; } } + } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) { + // FIXME: Properly handle all of the latency adjustments for address + // writeback. + switch (DefMCID.getOpcode()) { + default: break; + case ARM::LDRrs: + case ARM::LDRBrs: { + unsigned ShOpVal = + cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) + Latency -= 2; + else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) + --Latency; + break; + } + case ARM::t2LDRs: + case ARM::t2LDRBs: + case ARM::t2LDRHs: + case ARM::t2LDRSHs: { + // Thumb2 mode: lsl 0-3 only. + Latency -= 2; + break; + } + } } - if (DefAlign < 8 && Subtarget.isCortexA9()) + if (DefAlign < 8 && Subtarget.isLikeA9()) switch (DefMCID.getOpcode()) { default: break; case ARM::VLD1q8: @@ -3185,18 +3578,6 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } -unsigned -ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *DepMI) const { - unsigned Reg = DefMI->getOperand(DefIdx).getReg(); - if (DepMI->readsRegister(Reg, &getRegisterInfo()) || !isPredicated(DepMI)) - return 1; - - // If the second MI is predicated, then there is an implicit use dependency. - return getInstrLatency(ItinData, DefMI); -} - unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost) const { @@ -3354,9 +3735,9 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); - // Cortex-A9 is particularly picky about mixing the two and wants these + // A9-like cores are particularly picky about mixing the two and want these // converted. - if (Subtarget.isCortexA9() && !isPredicated(MI) && + if (Subtarget.isLikeA9() && !isPredicated(MI) && (MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR || MI->getOpcode() == ARM::VMOVS)) @@ -3394,12 +3775,54 @@ static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, return DReg; } +/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, +/// set ImplicitSReg to a register number that must be marked as implicit-use or +/// zero if no register needs to be defined as implicit-use. +/// +/// If the function cannot determine if an SPR should be marked implicit use or +/// not, it returns false. +/// +/// This function handles cases where an instruction is being modified from taking +/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict +/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other +/// lane of the DPR). +/// +/// If the other SPR is defined, an implicit-use of it should be added. Else, +/// (including the case where the DPR itself is defined), it should not. +/// +static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, + MachineInstr *MI, + unsigned DReg, unsigned Lane, + unsigned &ImplicitSReg) { + // If the DPR is defined or used already, the other SPR lane will be chained + // correctly, so there is nothing to be done. + if (MI->definesRegister(DReg, TRI) || MI->readsRegister(DReg, TRI)) { + ImplicitSReg = 0; + return true; + } + + // Otherwise we need to go searching to see if the SPR is set explicitly. + ImplicitSReg = TRI->getSubReg(DReg, + (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1); + MachineBasicBlock::LivenessQueryResult LQR = + MI->getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI); + + if (LQR == MachineBasicBlock::LQR_Live) + return true; + else if (LQR == MachineBasicBlock::LQR_Unknown) + return false; + + // If the register is known not to be live, there is no need to add an + // implicit-use. + ImplicitSReg = 0; + return true; +} void ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { unsigned DstReg, SrcReg, DReg; unsigned Lane; - MachineInstrBuilder MIB(MI); + MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); const TargetRegisterInfo *TRI = &getRegisterInfo(); switch (MI->getOpcode()) { default: @@ -3451,7 +3874,7 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // was dead before here. MIB.addReg(SrcReg, RegState::Implicit); break; - case ARM::VMOVSR: + case ARM::VMOVSR: { if (Domain != ExeNEON) break; assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); @@ -3462,12 +3885,9 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane); - // If we insert both a novel <def> and an <undef> on the DReg, we break - // any existing dependency chain on the unused lane. Either already being - // present means this instruction is in that chain anyway so we can make - // the transformation. - if (!MI->definesRegister(DReg, TRI) && !MI->readsRegister(DReg, TRI)) - break; + unsigned ImplicitSReg; + if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg)) + break; for (unsigned i = MI->getDesc().getNumOperands(); i; --i) MI->RemoveOperand(i-1); @@ -3484,7 +3904,10 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // The narrower destination must be marked as set to keep previous chains // in place. MIB.addReg(DstReg, RegState::Define | RegState::Implicit); + if (ImplicitSReg != 0) + MIB.addReg(ImplicitSReg, RegState::Implicit); break; + } case ARM::VMOVS: { if (Domain != ExeNEON) break; @@ -3497,12 +3920,9 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane); DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane); - // If we insert both a novel <def> and an <undef> on the DReg, we break - // any existing dependency chain on the unused lane. Either already being - // present means this instruction is in that chain anyway so we can make - // the transformation. - if (!MI->definesRegister(DDst, TRI) && !MI->readsRegister(DDst, TRI)) - break; + unsigned ImplicitSReg; + if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg)) + break; for (unsigned i = MI->getDesc().getNumOperands(); i; --i) MI->RemoveOperand(i-1); @@ -3520,6 +3940,8 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // more, so add them in manually. MIB.addReg(DstReg, RegState::Implicit | RegState::Define); MIB.addReg(SrcReg, RegState::Implicit); + if (ImplicitSReg != 0) + MIB.addReg(ImplicitSReg, RegState::Implicit); break; } @@ -3578,12 +4000,130 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // As before, the original destination is no longer represented, add it // implicitly. MIB.addReg(DstReg, RegState::Define | RegState::Implicit); + if (ImplicitSReg != 0) + MIB.addReg(ImplicitSReg, RegState::Implicit); break; } } } +//===----------------------------------------------------------------------===// +// Partial register updates +//===----------------------------------------------------------------------===// +// +// Swift renames NEON registers with 64-bit granularity. That means any +// instruction writing an S-reg implicitly reads the containing D-reg. The +// problem is mostly avoided by translating f32 operations to v2f32 operations +// on D-registers, but f32 loads are still a problem. +// +// These instructions can load an f32 into a NEON register: +// +// VLDRS - Only writes S, partial D update. +// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops. +// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. +// +// FCONSTD can be used as a dependency-breaking instruction. + + +unsigned ARMBaseInstrInfo:: +getPartialRegUpdateClearance(const MachineInstr *MI, + unsigned OpNum, + const TargetRegisterInfo *TRI) const { + // Only Swift has partial register update problems. + if (!SwiftPartialUpdateClearance || !Subtarget.isSwift()) + return 0; + + assert(TRI && "Need TRI instance"); + + const MachineOperand &MO = MI->getOperand(OpNum); + if (MO.readsReg()) + return 0; + unsigned Reg = MO.getReg(); + int UseOp = -1; + + switch(MI->getOpcode()) { + // Normal instructions writing only an S-register. + case ARM::VLDRS: + case ARM::FCONSTS: + case ARM::VMOVSR: + // rdar://problem/8791586 + case ARM::VMOVv8i8: + case ARM::VMOVv4i16: + case ARM::VMOVv2i32: + case ARM::VMOVv2f32: + case ARM::VMOVv1i64: + UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI); + break; + + // Explicitly reads the dependency. + case ARM::VLD1LNd32: + UseOp = 1; + break; + default: + return 0; + } + + // If this instruction actually reads a value from Reg, there is no unwanted + // dependency. + if (UseOp != -1 && MI->getOperand(UseOp).readsReg()) + return 0; + + // We must be able to clobber the whole D-reg. + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + // Virtual register must be a foo:ssub_0<def,undef> operand. + if (!MO.getSubReg() || MI->readsVirtualRegister(Reg)) + return 0; + } else if (ARM::SPRRegClass.contains(Reg)) { + // Physical register: MI must define the full D-reg. + unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0, + &ARM::DPRRegClass); + if (!DReg || !MI->definesRegister(DReg, TRI)) + return 0; + } + + // MI has an unwanted D-register dependency. + // Avoid defs in the previous N instructrions. + return SwiftPartialUpdateClearance; +} + +// Break a partial register dependency after getPartialRegUpdateClearance +// returned non-zero. +void ARMBaseInstrInfo:: +breakPartialRegDependency(MachineBasicBlock::iterator MI, + unsigned OpNum, + const TargetRegisterInfo *TRI) const { + assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def"); + assert(TRI && "Need TRI instance"); + + const MachineOperand &MO = MI->getOperand(OpNum); + unsigned Reg = MO.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Can't break virtual register dependencies."); + unsigned DReg = Reg; + + // If MI defines an S-reg, find the corresponding D super-register. + if (ARM::SPRRegClass.contains(Reg)) { + DReg = ARM::D0 + (Reg - ARM::S0) / 2; + assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken"); + } + + assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps"); + assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg"); + + // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines + // the full D-register by loading the same value to both lanes. The + // instruction is micro-coded with 2 uops, so don't do this until we can + // properly schedule micro-coded instuctions. The dispatcher stalls cause + // too big regressions. + + // Insert the dependency-breaking FCONSTD before MI. + // 96 is the encoding of 0.5, but the actual value doesn't matter here. + AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + get(ARM::FCONSTD), DReg).addImm(96)); + MI->addRegisterKilled(DReg, TRI, true); +} + bool ARMBaseInstrInfo::hasNOP() const { return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 92e5ee8..2698132 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -15,10 +15,10 @@ #define ARMBASEINSTRUCTIONINFO_H #include "ARM.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "ARMGenInstrInfo.inc" @@ -182,10 +182,13 @@ public: virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, const BranchProbability - &Probability) const { + &Probability) const { return NumCycles == 1; } + virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, + MachineBasicBlock &FMBB) const; + /// analyzeCompare - For a comparison instruction, return the source registers /// in SrcReg and SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction @@ -226,15 +229,18 @@ public: SDNode *DefNode, unsigned DefIdx, SDNode *UseNode, unsigned UseIdx) const; - virtual unsigned getOutputLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *DepMI) const; - /// VFP/NEON execution domains. std::pair<uint16_t, uint16_t> getExecutionDomain(const MachineInstr *MI) const; void setExecutionDomain(MachineInstr *MI, unsigned Domain) const; + unsigned getPartialRegUpdateClearance(const MachineInstr*, unsigned, + const TargetRegisterInfo*) const; + void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned, + const TargetRegisterInfo *TRI) const; + /// Get the number of addresses by LDM or VLDM or zero for unknown. + unsigned getNumLDMAddresses(const MachineInstr *MI) const; + private: unsigned getInstBundleLength(const MachineInstr *MI) const; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 9deb96e..d2f6a33 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -18,44 +18,34 @@ #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/LLVMContext.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/CommandLine.h" #define GET_REGINFO_TARGET_DESC #include "ARMGenRegisterInfo.inc" using namespace llvm; -static cl::opt<bool> -ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false), - cl::desc("Force use of virtual base registers for stack load/store")); -static cl::opt<bool> -EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden, - cl::desc("Enable pre-regalloc stack frame index allocation")); -static cl::opt<bool> -EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true), - cl::desc("Enable use of a base pointer for complex stack frames")); - ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) - : ARMGenRegisterInfo(ARM::LR), TII(tii), STI(sti), + : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), TII(tii), STI(sti), FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11), BasePtr(ARM::R6) { } @@ -84,6 +74,11 @@ ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const { ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; } +const uint32_t* +ARMBaseRegisterInfo::getNoPreservedMask() const { + return CSR_NoRegs_RegMask; +} + BitVector ARMBaseRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); @@ -106,148 +101,12 @@ getReservedRegs(const MachineFunction &MF) const { for (unsigned i = 0; i != 16; ++i) Reserved.set(ARM::D16 + i); } - return Reserved; -} + const TargetRegisterClass *RC = &ARM::GPRPairRegClass; + for(TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I!=E; ++I) + for (MCSubRegIterator SI(*I, this); SI.isValid(); ++SI) + if (Reserved.test(*SI)) Reserved.set(*I); -bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF, - unsigned Reg) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - switch (Reg) { - default: break; - case ARM::SP: - case ARM::PC: - return true; - case ARM::R6: - if (hasBasePointer(MF)) - return true; - break; - case ARM::R7: - case ARM::R11: - if (FramePtr == Reg && TFI->hasFP(MF)) - return true; - break; - case ARM::R9: - return STI.isR9Reserved(); - } - - return false; -} - -bool -ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC, - SmallVectorImpl<unsigned> &SubIndices, - unsigned &NewSubIdx) const { - - unsigned Size = RC->getSize() * 8; - if (Size < 6) - return 0; - - NewSubIdx = 0; // Whole register. - unsigned NumRegs = SubIndices.size(); - if (NumRegs == 8) { - // 8 D registers -> 1 QQQQ register. - return (Size == 512 && - SubIndices[0] == ARM::dsub_0 && - SubIndices[1] == ARM::dsub_1 && - SubIndices[2] == ARM::dsub_2 && - SubIndices[3] == ARM::dsub_3 && - SubIndices[4] == ARM::dsub_4 && - SubIndices[5] == ARM::dsub_5 && - SubIndices[6] == ARM::dsub_6 && - SubIndices[7] == ARM::dsub_7); - } else if (NumRegs == 4) { - if (SubIndices[0] == ARM::qsub_0) { - // 4 Q registers -> 1 QQQQ register. - return (Size == 512 && - SubIndices[1] == ARM::qsub_1 && - SubIndices[2] == ARM::qsub_2 && - SubIndices[3] == ARM::qsub_3); - } else if (SubIndices[0] == ARM::dsub_0) { - // 4 D registers -> 1 QQ register. - if (Size >= 256 && - SubIndices[1] == ARM::dsub_1 && - SubIndices[2] == ARM::dsub_2 && - SubIndices[3] == ARM::dsub_3) { - if (Size == 512) - NewSubIdx = ARM::qqsub_0; - return true; - } - } else if (SubIndices[0] == ARM::dsub_4) { - // 4 D registers -> 1 QQ register (2nd). - if (Size == 512 && - SubIndices[1] == ARM::dsub_5 && - SubIndices[2] == ARM::dsub_6 && - SubIndices[3] == ARM::dsub_7) { - NewSubIdx = ARM::qqsub_1; - return true; - } - } else if (SubIndices[0] == ARM::ssub_0) { - // 4 S registers -> 1 Q register. - if (Size >= 128 && - SubIndices[1] == ARM::ssub_1 && - SubIndices[2] == ARM::ssub_2 && - SubIndices[3] == ARM::ssub_3) { - if (Size >= 256) - NewSubIdx = ARM::qsub_0; - return true; - } - } - } else if (NumRegs == 2) { - if (SubIndices[0] == ARM::qsub_0) { - // 2 Q registers -> 1 QQ register. - if (Size >= 256 && SubIndices[1] == ARM::qsub_1) { - if (Size == 512) - NewSubIdx = ARM::qqsub_0; - return true; - } - } else if (SubIndices[0] == ARM::qsub_2) { - // 2 Q registers -> 1 QQ register (2nd). - if (Size == 512 && SubIndices[1] == ARM::qsub_3) { - NewSubIdx = ARM::qqsub_1; - return true; - } - } else if (SubIndices[0] == ARM::dsub_0) { - // 2 D registers -> 1 Q register. - if (Size >= 128 && SubIndices[1] == ARM::dsub_1) { - if (Size >= 256) - NewSubIdx = ARM::qsub_0; - return true; - } - } else if (SubIndices[0] == ARM::dsub_2) { - // 2 D registers -> 1 Q register (2nd). - if (Size >= 256 && SubIndices[1] == ARM::dsub_3) { - NewSubIdx = ARM::qsub_1; - return true; - } - } else if (SubIndices[0] == ARM::dsub_4) { - // 2 D registers -> 1 Q register (3rd). - if (Size == 512 && SubIndices[1] == ARM::dsub_5) { - NewSubIdx = ARM::qsub_2; - return true; - } - } else if (SubIndices[0] == ARM::dsub_6) { - // 2 D registers -> 1 Q register (3rd). - if (Size == 512 && SubIndices[1] == ARM::dsub_7) { - NewSubIdx = ARM::qsub_3; - return true; - } - } else if (SubIndices[0] == ARM::ssub_0) { - // 2 S registers -> 1 D register. - if (SubIndices[1] == ARM::ssub_1) { - if (Size >= 128) - NewSubIdx = ARM::dsub_0; - return true; - } - } else if (SubIndices[0] == ARM::ssub_2) { - // 2 S registers -> 1 D register (2nd). - if (Size >= 128 && SubIndices[1] == ARM::ssub_3) { - NewSubIdx = ARM::dsub_1; - return true; - } - } - } - return false; + return Reserved; } const TargetRegisterClass* @@ -263,6 +122,7 @@ ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) case ARM::QPRRegClassID: case ARM::QQPRRegClassID: case ARM::QQQQPRRegClassID: + case ARM::GPRPairRegClassID: return Super; } Super = *I++; @@ -303,154 +163,62 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } -/// getRawAllocationOrder - Returns the register allocation order for a -/// specified register class with a target-dependent hint. -ArrayRef<uint16_t> -ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC, - unsigned HintType, unsigned HintReg, - const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - // Alternative register allocation orders when favoring even / odd registers - // of register pairs. - - // No FP, R9 is available. - static const uint16_t GPREven1[] = { - ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10, - ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, - ARM::R9, ARM::R11 - }; - static const uint16_t GPROdd1[] = { - ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11, - ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, - ARM::R8, ARM::R10 - }; - - // FP is R7, R9 is available. - static const uint16_t GPREven2[] = { - ARM::R0, ARM::R2, ARM::R4, ARM::R8, ARM::R10, - ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, - ARM::R9, ARM::R11 - }; - static const uint16_t GPROdd2[] = { - ARM::R1, ARM::R3, ARM::R5, ARM::R9, ARM::R11, - ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, - ARM::R8, ARM::R10 - }; - - // FP is R11, R9 is available. - static const uint16_t GPREven3[] = { - ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, - ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, - ARM::R9 - }; - static const uint16_t GPROdd3[] = { - ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9, - ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7, - ARM::R8 - }; - - // No FP, R9 is not available. - static const uint16_t GPREven4[] = { - ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R10, - ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8, - ARM::R11 - }; - static const uint16_t GPROdd4[] = { - ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R11, - ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, - ARM::R10 - }; - - // FP is R7, R9 is not available. - static const uint16_t GPREven5[] = { - ARM::R0, ARM::R2, ARM::R4, ARM::R10, - ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8, - ARM::R11 - }; - static const uint16_t GPROdd5[] = { - ARM::R1, ARM::R3, ARM::R5, ARM::R11, - ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, - ARM::R10 - }; - - // FP is R11, R9 is not available. - static const uint16_t GPREven6[] = { - ARM::R0, ARM::R2, ARM::R4, ARM::R6, - ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8 - }; - static const uint16_t GPROdd6[] = { - ARM::R1, ARM::R3, ARM::R5, ARM::R7, - ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8 - }; - - // We only support even/odd hints for GPR and rGPR. - if (RC != &ARM::GPRRegClass && RC != &ARM::rGPRRegClass) - return RC->getRawAllocationOrder(MF); - - if (HintType == ARMRI::RegPairEven) { - if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0) - // It's no longer possible to fulfill this hint. Return the default - // allocation order. - return RC->getRawAllocationOrder(MF); - - if (!TFI->hasFP(MF)) { - if (!STI.isR9Reserved()) - return makeArrayRef(GPREven1); - else - return makeArrayRef(GPREven4); - } else if (FramePtr == ARM::R7) { - if (!STI.isR9Reserved()) - return makeArrayRef(GPREven2); - else - return makeArrayRef(GPREven5); - } else { // FramePtr == ARM::R11 - if (!STI.isR9Reserved()) - return makeArrayRef(GPREven3); - else - return makeArrayRef(GPREven6); - } - } else if (HintType == ARMRI::RegPairOdd) { - if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0) - // It's no longer possible to fulfill this hint. Return the default - // allocation order. - return RC->getRawAllocationOrder(MF); - - if (!TFI->hasFP(MF)) { - if (!STI.isR9Reserved()) - return makeArrayRef(GPROdd1); - else - return makeArrayRef(GPROdd4); - } else if (FramePtr == ARM::R7) { - if (!STI.isR9Reserved()) - return makeArrayRef(GPROdd2); - else - return makeArrayRef(GPROdd5); - } else { // FramePtr == ARM::R11 - if (!STI.isR9Reserved()) - return makeArrayRef(GPROdd3); - else - return makeArrayRef(GPROdd6); - } - } - return RC->getRawAllocationOrder(MF); +// Get the other register in a GPRPair. +static unsigned getPairedGPR(unsigned Reg, bool Odd, const MCRegisterInfo *RI) { + for (MCSuperRegIterator Supers(Reg, RI); Supers.isValid(); ++Supers) + if (ARM::GPRPairRegClass.contains(*Supers)) + return RI->getSubReg(*Supers, Odd ? ARM::gsub_1 : ARM::gsub_0); + return 0; } -/// ResolveRegAllocHint - Resolves the specified register allocation hint -/// to a physical register. Returns the physical register if it is successful. -unsigned -ARMBaseRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg, - const MachineFunction &MF) const { - if (Reg == 0 || !isPhysicalRegister(Reg)) - return 0; - if (Type == 0) - return Reg; - else if (Type == (unsigned)ARMRI::RegPairOdd) - // Odd register. - return getRegisterPairOdd(Reg, MF); - else if (Type == (unsigned)ARMRI::RegPairEven) - // Even register. - return getRegisterPairEven(Reg, MF); - return 0; +// Resolve the RegPairEven / RegPairOdd register allocator hints. +void +ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, + ArrayRef<MCPhysReg> Order, + SmallVectorImpl<MCPhysReg> &Hints, + const MachineFunction &MF, + const VirtRegMap *VRM) const { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg); + + unsigned Odd; + switch (Hint.first) { + case ARMRI::RegPairEven: + Odd = 0; + break; + case ARMRI::RegPairOdd: + Odd = 1; + break; + default: + TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM); + return; + } + + // This register should preferably be even (Odd == 0) or odd (Odd == 1). + // Check if the other part of the pair has already been assigned, and provide + // the paired register as the first hint. + unsigned PairedPhys = 0; + if (VRM && VRM->hasPhys(Hint.second)) { + PairedPhys = getPairedGPR(VRM->getPhys(Hint.second), Odd, this); + if (PairedPhys && MRI.isReserved(PairedPhys)) + PairedPhys = 0; + } + + // First prefer the paired physreg. + if (PairedPhys) + Hints.push_back(PairedPhys); + + // Then prefer even or odd registers. + for (unsigned I = 0, E = Order.size(); I != E; ++I) { + unsigned Reg = Order[I]; + if (Reg == PairedPhys || (getEncodingValue(Reg) & 1) != Odd) + continue; + // Don't provide hints that are paired to a reserved register. + unsigned Paired = getPairedGPR(Reg, !Odd, this); + if (!Paired || MRI.isReserved(Paired)) + continue; + Hints.push_back(Reg); + } } void @@ -476,7 +244,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, bool ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { // CortexA9 has a Write-after-write hazard for NEON registers. - if (!STI.isCortexA9()) + if (!STI.isLikeA9()) return false; switch (RC->getID()) { @@ -501,9 +269,6 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - if (!EnableBasePointer) - return false; - // When outgoing call frames are so large that we adjust the stack pointer // around the call, we can no longer use the stack pointer to reach the // emergency spill slot. @@ -549,8 +314,6 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { // pointer adjustments around calls. if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF)) return true; - if (!EnableBasePointer) - return false; // A base pointer is required and allowed. Check that it isn't too late to // reserve it. return MRI->canReserveReg(BasePtr); @@ -561,8 +324,10 @@ needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); - bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || - F->hasFnAttr(Attribute::StackAlignment)); + bool requiresRealignment = + ((MFI->getMaxAlignment() > StackAlign) || + F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackAlignment)); return requiresRealignment && canRealignStack(MF); } @@ -593,112 +358,6 @@ unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const { llvm_unreachable("What is the exception handler register"); } -unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, - const MachineFunction &MF) const { - switch (Reg) { - default: break; - // Return 0 if either register of the pair is a special register. - // So no R12, etc. - case ARM::R1: return ARM::R0; - case ARM::R3: return ARM::R2; - case ARM::R5: return ARM::R4; - case ARM::R7: - return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6)) - ? 0 : ARM::R6; - case ARM::R9: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8; - case ARM::R11: return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10; - - case ARM::S1: return ARM::S0; - case ARM::S3: return ARM::S2; - case ARM::S5: return ARM::S4; - case ARM::S7: return ARM::S6; - case ARM::S9: return ARM::S8; - case ARM::S11: return ARM::S10; - case ARM::S13: return ARM::S12; - case ARM::S15: return ARM::S14; - case ARM::S17: return ARM::S16; - case ARM::S19: return ARM::S18; - case ARM::S21: return ARM::S20; - case ARM::S23: return ARM::S22; - case ARM::S25: return ARM::S24; - case ARM::S27: return ARM::S26; - case ARM::S29: return ARM::S28; - case ARM::S31: return ARM::S30; - - case ARM::D1: return ARM::D0; - case ARM::D3: return ARM::D2; - case ARM::D5: return ARM::D4; - case ARM::D7: return ARM::D6; - case ARM::D9: return ARM::D8; - case ARM::D11: return ARM::D10; - case ARM::D13: return ARM::D12; - case ARM::D15: return ARM::D14; - case ARM::D17: return ARM::D16; - case ARM::D19: return ARM::D18; - case ARM::D21: return ARM::D20; - case ARM::D23: return ARM::D22; - case ARM::D25: return ARM::D24; - case ARM::D27: return ARM::D26; - case ARM::D29: return ARM::D28; - case ARM::D31: return ARM::D30; - } - - return 0; -} - -unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg, - const MachineFunction &MF) const { - switch (Reg) { - default: break; - // Return 0 if either register of the pair is a special register. - // So no R12, etc. - case ARM::R0: return ARM::R1; - case ARM::R2: return ARM::R3; - case ARM::R4: return ARM::R5; - case ARM::R6: - return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6)) - ? 0 : ARM::R7; - case ARM::R8: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9; - case ARM::R10: return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11; - - case ARM::S0: return ARM::S1; - case ARM::S2: return ARM::S3; - case ARM::S4: return ARM::S5; - case ARM::S6: return ARM::S7; - case ARM::S8: return ARM::S9; - case ARM::S10: return ARM::S11; - case ARM::S12: return ARM::S13; - case ARM::S14: return ARM::S15; - case ARM::S16: return ARM::S17; - case ARM::S18: return ARM::S19; - case ARM::S20: return ARM::S21; - case ARM::S22: return ARM::S23; - case ARM::S24: return ARM::S25; - case ARM::S26: return ARM::S27; - case ARM::S28: return ARM::S29; - case ARM::S30: return ARM::S31; - - case ARM::D0: return ARM::D1; - case ARM::D2: return ARM::D3; - case ARM::D4: return ARM::D5; - case ARM::D6: return ARM::D7; - case ARM::D8: return ARM::D9; - case ARM::D10: return ARM::D11; - case ARM::D12: return ARM::D13; - case ARM::D14: return ARM::D15; - case ARM::D16: return ARM::D17; - case ARM::D18: return ARM::D19; - case ARM::D20: return ARM::D21; - case ARM::D22: return ARM::D23; - case ARM::D24: return ARM::D25; - case ARM::D26: return ARM::D27; - case ARM::D28: return ARM::D29; - case ARM::D30: return ARM::D31; - } - - return 0; -} - /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. void ARMBaseRegisterInfo:: @@ -738,7 +397,7 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const { bool ARMBaseRegisterInfo:: requiresVirtualBaseRegisters(const MachineFunction &MF) const { - return EnableLocalStackAlloc; + return true; } static void @@ -877,8 +536,6 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { case ARM::VLDRS: case ARM::VLDRD: case ARM::VSTRS: case ARM::VSTRD: case ARM::tSTRspi: case ARM::tLDRspi: - if (ForceAllBaseRegAlloc) - return true; break; default: return false; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index da29f7e..aaa56a9 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -96,19 +96,10 @@ public: /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID) const; + const uint32_t *getNoPreservedMask() const; BitVector getReservedRegs(const MachineFunction &MF) const; - /// canCombineSubRegIndices - Given a register class and a list of - /// subregister indices, return true if it's possible to combine the - /// subregister indices into one that corresponds to a larger - /// subregister. Return the new subregister index by reference. Note the - /// new index may be zero if the given subregisters can be combined to - /// form the whole register. - virtual bool canCombineSubRegIndices(const TargetRegisterClass *RC, - SmallVectorImpl<unsigned> &SubIndices, - unsigned &NewSubIdx) const; - const TargetRegisterClass* getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const; const TargetRegisterClass* @@ -120,12 +111,11 @@ public: unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const; - ArrayRef<uint16_t> getRawAllocationOrder(const TargetRegisterClass *RC, - unsigned HintType, unsigned HintReg, - const MachineFunction &MF) const; - - unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg, - const MachineFunction &MF) const; + void getRegAllocationHints(unsigned VirtReg, + ArrayRef<MCPhysReg> Order, + SmallVectorImpl<MCPhysReg> &Hints, + const MachineFunction &MF, + const VirtRegMap *VRM) const; void UpdateRegAllocHint(unsigned Reg, unsigned NewReg, MachineFunction &MF) const; @@ -170,8 +160,6 @@ public: unsigned MIFlags = MachineInstr::NoFlags)const; /// Code Generation virtual methods... - virtual bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; - virtual bool requiresRegisterScavenging(const MachineFunction &MF) const; virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; @@ -186,11 +174,6 @@ public: virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; - -private: - unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const; - - unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const; }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index 0bd1c3e..e6e8c3d 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -18,8 +18,8 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/IR/CallingConv.h" #include "llvm/Target/TargetInstrInfo.h" namespace llvm { diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index bda1517..b378b96 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -190,6 +190,8 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[ // Callee-saved register lists. //===----------------------------------------------------------------------===// +def CSR_NoRegs : CalleeSavedRegs<(add)>; + def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, (sequence "D%u", 15, 8))>; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 89cbc84..95decfe 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -14,16 +14,13 @@ #define DEBUG_TYPE "jit" #include "ARM.h" -#include "ARMConstantPoolValue.h" #include "ARMBaseInstrInfo.h" +#include "ARMConstantPoolValue.h" #include "ARMRelocations.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/PassManager.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/JITCodeEmitter.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -31,7 +28,10 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/PassManager.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -47,7 +47,7 @@ namespace { class ARMCodeEmitter : public MachineFunctionPass { ARMJITInfo *JTI; const ARMBaseInstrInfo *II; - const TargetData *TD; + const DataLayout *TD; const ARMSubtarget *Subtarget; TargetMachine &TM; JITCodeEmitter &MCE; @@ -67,7 +67,7 @@ namespace { ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) : MachineFunctionPass(ID), JTI(0), II((const ARMBaseInstrInfo *)tm.getInstrInfo()), - TD(tm.getTargetData()), TM(tm), + TD(tm.getDataLayout()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {} @@ -392,12 +392,16 @@ FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, } bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { - assert((MF.getTarget().getRelocationModel() != Reloc::Default || - MF.getTarget().getRelocationModel() != Reloc::Static) && + TargetMachine &Target = const_cast<TargetMachine&>(MF.getTarget()); + + assert((Target.getRelocationModel() != Reloc::Default || + Target.getRelocationModel() != Reloc::Static) && "JIT relocation model must be set to static or default!"); - JTI = ((ARMBaseTargetMachine &)MF.getTarget()).getJITInfo(); - II = (const ARMBaseInstrInfo *)MF.getTarget().getInstrInfo(); - TD = MF.getTarget().getTargetData(); + + JTI = static_cast<ARMJITInfo*>(Target.getJITInfo()); + II = static_cast<const ARMBaseInstrInfo*>(Target.getInstrInfo()); + TD = Target.getDataLayout(); + Subtarget = &TM.getSubtarget<ARMSubtarget>(); MCPEs = &MF.getConstantPool()->getConstants(); MJTEs = 0; diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index dd05f0c..70a25c2 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -16,23 +16,23 @@ #define DEBUG_TYPE "arm-cp-islands" #include "ARM.h" #include "ARMMachineFunctionInfo.h" -#include "Thumb2InstrInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "Thumb2InstrInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" #include <algorithm> using namespace llvm; @@ -528,7 +528,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { // identity mapping of CPI's to CPE's. const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants(); - const TargetData &TD = *MF->getTarget().getTargetData(); + const DataLayout &TD = *MF->getTarget().getDataLayout(); for (unsigned i = 0, e = CPs.size(); i != e; ++i) { unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); assert(Size >= 4 && "Too small constant pool entry"); diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index fa3226e..4e703ec 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -13,11 +13,11 @@ #include "ARMConstantPoolValue.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/Constant.h" -#include "llvm/Constants.h" -#include "llvm/GlobalValue.h" -#include "llvm/Type.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Type.h" #include "llvm/Support/raw_ostream.h" #include <cstdlib> using namespace llvm; @@ -206,11 +206,7 @@ ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, const char *s, bool AddCurrentAddress) : ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier, AddCurrentAddress), - S(strdup(s)) {} - -ARMConstantPoolSymbol::~ARMConstantPoolSymbol() { - free((void*)S); -} + S(s) {} ARMConstantPoolSymbol * ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s, @@ -218,14 +214,6 @@ ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s, return new ARMConstantPoolSymbol(C, s, ID, PCAdj, ARMCP::no_modifier, false); } -static bool CPV_streq(const char *S1, const char *S2) { - if (S1 == S2) - return true; - if (S1 && S2 && strcmp(S1, S2) == 0) - return true; - return false; -} - int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { unsigned AlignMask = Alignment - 1; @@ -238,7 +226,7 @@ int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP, ARMConstantPoolSymbol *APS = dyn_cast<ARMConstantPoolSymbol>(CPV); if (!APS) continue; - if (CPV_streq(APS->S, S) && equals(APS)) + if (APS->S == S && equals(APS)) return i; } } @@ -248,12 +236,11 @@ int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP, bool ARMConstantPoolSymbol::hasSameValue(ARMConstantPoolValue *ACPV) { const ARMConstantPoolSymbol *ACPS = dyn_cast<ARMConstantPoolSymbol>(ACPV); - return ACPS && CPV_streq(ACPS->S, S) && - ARMConstantPoolValue::hasSameValue(ACPV); + return ACPS && ACPS->S == S && ARMConstantPoolValue::hasSameValue(ACPV); } void ARMConstantPoolSymbol::addSelectionDAGCSEId(FoldingSetNodeID &ID) { - ID.AddPointer(S); + ID.AddString(S); ARMConstantPoolValue::addSelectionDAGCSEId(ID); } diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index 6b98d44..93812fe 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -102,8 +102,6 @@ public: virtual void print(raw_ostream &O) const; void print(raw_ostream *O) const { if (O) print(*O); } void dump() const; - - static bool classof(const ARMConstantPoolValue *) { return true; } }; inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) { @@ -158,25 +156,22 @@ public: static bool classof(const ARMConstantPoolValue *APV) { return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA(); } - static bool classof(const ARMConstantPoolConstant *) { return true; } }; /// ARMConstantPoolSymbol - ARM-specific constantpool values for external /// symbols. class ARMConstantPoolSymbol : public ARMConstantPoolValue { - const char *S; // ExtSymbol being loaded. + const std::string S; // ExtSymbol being loaded. ARMConstantPoolSymbol(LLVMContext &C, const char *s, unsigned id, unsigned char PCAdj, ARMCP::ARMCPModifier Modifier, bool AddCurrentAddress); public: - ~ARMConstantPoolSymbol(); - static ARMConstantPoolSymbol *Create(LLVMContext &C, const char *s, unsigned ID, unsigned char PCAdj); - const char *getSymbol() const { return S; } + const char *getSymbol() const { return S.c_str(); } virtual int getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment); @@ -192,7 +187,6 @@ public: static bool classof(const ARMConstantPoolValue *ACPV) { return ACPV->isExtSymbol(); } - static bool classof(const ARMConstantPoolSymbol *) { return true; } }; /// ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic @@ -225,7 +219,6 @@ public: static bool classof(const ARMConstantPoolValue *ACPV) { return ACPV->isMachineBasicBlock(); } - static bool classof(const ARMConstantPoolMBB *) { return true; } }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMELFWriterInfo.cpp b/lib/Target/ARM/ARMELFWriterInfo.cpp deleted file mode 100644 index f671317..0000000 --- a/lib/Target/ARM/ARMELFWriterInfo.cpp +++ /dev/null @@ -1,78 +0,0 @@ -//===-- ARMELFWriterInfo.cpp - ELF Writer Info for the ARM backend --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF writer information for the ARM backend. -// -//===----------------------------------------------------------------------===// - -#include "ARMELFWriterInfo.h" -#include "ARMRelocations.h" -#include "llvm/Function.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Support/ELF.h" - -using namespace llvm; - -//===----------------------------------------------------------------------===// -// Implementation of the ARMELFWriterInfo class -//===----------------------------------------------------------------------===// - -ARMELFWriterInfo::ARMELFWriterInfo(TargetMachine &TM) - : TargetELFWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64, - TM.getTargetData()->isLittleEndian()) { -} - -ARMELFWriterInfo::~ARMELFWriterInfo() {} - -unsigned ARMELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { - switch (MachineRelTy) { - case ARM::reloc_arm_absolute: - case ARM::reloc_arm_relative: - case ARM::reloc_arm_cp_entry: - case ARM::reloc_arm_vfp_cp_entry: - case ARM::reloc_arm_machine_cp_entry: - case ARM::reloc_arm_jt_base: - case ARM::reloc_arm_pic_jt: - llvm_unreachable("unsupported ARM relocation type"); - - case ARM::reloc_arm_branch: return ELF::R_ARM_CALL; - case ARM::reloc_arm_movt: return ELF::R_ARM_MOVT_ABS; - case ARM::reloc_arm_movw: return ELF::R_ARM_MOVW_ABS_NC; - default: - llvm_unreachable("unknown ARM relocation type"); - } -} - -long int ARMELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, - long int Modifier) const { - llvm_unreachable("ARMELFWriterInfo::getDefaultAddendForRelTy() not " - "implemented"); -} - -unsigned ARMELFWriterInfo::getRelocationTySize(unsigned RelTy) const { - llvm_unreachable("ARMELFWriterInfo::getRelocationTySize() not implemented"); -} - -bool ARMELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { - llvm_unreachable("ARMELFWriterInfo::isPCRelativeRel() not implemented"); -} - -unsigned ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() const { - llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not " - "implemented"); -} - -long int ARMELFWriterInfo::computeRelocation(unsigned SymOffset, - unsigned RelOffset, - unsigned RelTy) const { - llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not " - "implemented"); -} diff --git a/lib/Target/ARM/ARMELFWriterInfo.h b/lib/Target/ARM/ARMELFWriterInfo.h deleted file mode 100644 index 6a84f8a..0000000 --- a/lib/Target/ARM/ARMELFWriterInfo.h +++ /dev/null @@ -1,59 +0,0 @@ -//===-- ARMELFWriterInfo.h - ELF Writer Info for ARM ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF writer information for the ARM backend. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM_ELF_WRITER_INFO_H -#define ARM_ELF_WRITER_INFO_H - -#include "llvm/Target/TargetELFWriterInfo.h" - -namespace llvm { - class TargetMachine; - - class ARMELFWriterInfo : public TargetELFWriterInfo { - public: - ARMELFWriterInfo(TargetMachine &TM); - virtual ~ARMELFWriterInfo(); - - /// getRelocationType - Returns the target specific ELF Relocation type. - /// 'MachineRelTy' contains the object code independent relocation type - virtual unsigned getRelocationType(unsigned MachineRelTy) const; - - /// hasRelocationAddend - True if the target uses an addend in the - /// ELF relocation entry. - virtual bool hasRelocationAddend() const { return false; } - - /// getDefaultAddendForRelTy - Gets the default addend value for a - /// relocation entry based on the target ELF relocation type. - virtual long int getDefaultAddendForRelTy(unsigned RelTy, - long int Modifier = 0) const; - - /// getRelTySize - Returns the size of relocatable field in bits - virtual unsigned getRelocationTySize(unsigned RelTy) const; - - /// isPCRelativeRel - True if the relocation type is pc relative - virtual bool isPCRelativeRel(unsigned RelTy) const; - - /// getJumpTableRelocationTy - Returns the machine relocation type used - /// to reference a jumptable. - virtual unsigned getAbsoluteLabelMachineRelTy() const; - - /// computeRelocation - Some relocatable fields could be relocated - /// directly, avoiding the relocation symbol emission, compute the - /// final relocation value for this symbol. - virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset, - unsigned RelTy) const; - }; - -} // end llvm namespace - -#endif // ARM_ELF_WRITER_INFO_H diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 8ed6b75..beb843c 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -23,10 +23,10 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove! +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; static cl::opt<bool> @@ -103,9 +103,9 @@ namespace { bool IsLoad; bool isUpdating; bool hasWritebackOperand; - NEONRegSpacing RegSpacing; - unsigned char NumRegs; // D registers loaded or stored - unsigned char RegElts; // elements per D register; used for lane ops + uint8_t RegSpacing; // One of type NEONRegSpacing + uint8_t NumRegs; // D registers loaded or stored + uint8_t RegElts; // elements per D register; used for lane ops // FIXME: Temporary flag to denote whether the real instruction takes // a single register (like the encoding) or all of the registers in // the list (like the asm syntax and the isel DAG). When all definitions @@ -377,7 +377,7 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed"); - NEONRegSpacing RegSpc = TableEntry->RegSpacing; + NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; unsigned NumRegs = TableEntry->NumRegs; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), @@ -442,7 +442,7 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed"); - NEONRegSpacing RegSpc = TableEntry->RegSpacing; + NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; unsigned NumRegs = TableEntry->NumRegs; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), @@ -493,7 +493,7 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); assert(TableEntry && "NEONLdStTable lookup failed"); - NEONRegSpacing RegSpc = TableEntry->RegSpacing; + NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; unsigned NumRegs = TableEntry->NumRegs; unsigned RegElts = TableEntry->RegElts; @@ -777,9 +777,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } - case ARM::Int_eh_sjlj_dispatchsetup: - case ARM::Int_eh_sjlj_dispatchsetup_nofp: - case ARM::tInt_eh_sjlj_dispatchsetup: { + case ARM::Int_eh_sjlj_dispatchsetup: { MachineFunction &MF = *MI.getParent()->getParent(); const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); @@ -1208,57 +1206,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandLaneOp(MBBI); return true; - case ARM::VSETLNi8Q: - case ARM::VSETLNi16Q: { - // Expand VSETLNs acting on a Q register to equivalent VSETLNs acting - // on the respective D register. - - unsigned QReg = MI.getOperand(1).getReg(); - unsigned QLane = MI.getOperand(3).getImm(); - - unsigned NewOpcode, DLane, DSubReg; - switch (Opcode) { - default: llvm_unreachable("Invalid opcode!"); - case ARM::VSETLNi8Q: - // 4 possible 8-bit lanes per DPR: - NewOpcode = ARM::VSETLNi8; - DLane = QLane % 8; - DSubReg = (QLane / 8) ? ARM::dsub_1 : ARM::dsub_0; - break; - case ARM::VSETLNi16Q: - // 4 possible 16-bit lanes per DPR. - NewOpcode = ARM::VSETLNi16; - DLane = QLane % 4; - DSubReg = (QLane / 4) ? ARM::dsub_1 : ARM::dsub_0; - break; - } - - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpcode)); - - unsigned DReg = TRI->getSubReg(QReg, DSubReg); - - MIB.addReg(DReg, RegState::Define); // Output DPR - MIB.addReg(DReg); // Input DPR - MIB.addOperand(MI.getOperand(2)); // Input GPR - MIB.addImm(DLane); // Lane - - // Add the predicate operands. - MIB.addOperand(MI.getOperand(4)); - MIB.addOperand(MI.getOperand(5)); - - if (MI.getOperand(1).isKill()) // Add an implicit kill for the Q register. - MIB->addRegisterKilled(QReg, TRI, true); - // And an implicit def of the output register (which should always be the - // same as the input register). - MIB->addRegisterDefined(QReg, TRI); - - TransferImpOps(MI, MIB, MIB); - - MI.eraseFromParent(); - return true; - } - case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true; case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 045d904..94c574a 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -16,31 +16,31 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMCallingConv.h" -#include "ARMTargetMachine.h" -#include "ARMSubtarget.h" #include "ARMConstantPoolValue.h" +#include "ARMSubtarget.h" +#include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/CallingConv.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Module.h" -#include "llvm/Operator.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" @@ -178,22 +178,24 @@ class ARMFastISel : public FastISel { bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt); - bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, + bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, unsigned Alignment = 0, bool isZExt = true, bool allocReg = true); - bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, + bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, unsigned Alignment = 0); bool ARMComputeAddress(const Value *Obj, Address &Addr); - void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3); + void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3); bool ARMIsMemCpySmall(uint64_t Len); - bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len); - unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt); - unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); - unsigned ARMMaterializeInt(const Constant *C, EVT VT); - unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); - unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); - unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); + bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, + unsigned Alignment); + unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); + unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT); + unsigned ARMMaterializeInt(const Constant *C, MVT VT); + unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT); + unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg); + unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg); unsigned ARMSelectCallOp(bool UseReg); + unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT); // Call handling routines. private: @@ -219,7 +221,7 @@ class ARMFastISel : public FastISel { bool isARMNEONPred(const MachineInstr *MI); bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); - void AddLoadStoreOperands(EVT VT, Address &Addr, + void AddLoadStoreOperands(MVT VT, Address &Addr, const MachineInstrBuilder &MIB, unsigned Flags, bool useAM3); }; @@ -485,7 +487,7 @@ unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, // TODO: Don't worry about 64-bit now, but when this is fixed remove the // checks from the various callers. -unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) { +unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) { if (VT == MVT::f64) return 0; unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); @@ -495,7 +497,7 @@ unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) { return MoveReg; } -unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) { +unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) { if (VT == MVT::i64) return 0; unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); @@ -508,7 +510,7 @@ unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) { // For double width floating point we need to materialize two constants // (the high and the low) into integer registers then use a move to get // the combined constant into an FP reg. -unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) { +unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) { const APFloat Val = CFP->getValueAPF(); bool is64bit = VT == MVT::f64; @@ -552,7 +554,7 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) { return DestReg; } -unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { +unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) return false; @@ -562,7 +564,9 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { const ConstantInt *CI = cast<ConstantInt>(C); if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) { unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16; - unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass : + &ARM::GPRRegClass; + unsigned ImmReg = createResultReg(RC); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ImmReg) .addImm(CI->getZExtValue())); @@ -612,13 +616,16 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { return DestReg; } -unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { +unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { // For now 32-bit only. if (VT != MVT::i32) return 0; Reloc::Model RelocM = TM.getRelocationModel(); bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM); - unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); + const TargetRegisterClass *RC = isThumb2 ? + (const TargetRegisterClass*)&ARM::rGPRRegClass : + (const TargetRegisterClass*)&ARM::GPRRegClass; + unsigned DestReg = createResultReg(RC); // Use movw+movt when possible, it avoids constant pool entries. // Darwin targets don't support movt with Reloc::Static, see @@ -648,6 +655,9 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { Align = TD.getTypeAllocSize(GV->getType()); } + if (Subtarget->isTargetELF() && RelocM == Reloc::PIC_) + return ARMLowerPICELF(GV, Align, VT); + // Grab index. unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); @@ -709,10 +719,11 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { } unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { - EVT VT = TLI.getValueType(C->getType(), true); + EVT CEVT = TLI.getValueType(C->getType(), true); // Only handle simple types. - if (!VT.isSimple()) return 0; + if (!CEVT.isSimple()) return 0; + MVT VT = CEVT.getSimpleVT(); if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) return ARMMaterializeFP(CFP, VT); @@ -888,12 +899,9 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { return Addr.Base.Reg != 0; } -void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { - - assert(VT.isSimple() && "Non-simple types are invalid here!"); - +void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) { bool needsLowering = false; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { default: llvm_unreachable("Unhandled load/store type!"); case MVT::i1: case MVT::i8: @@ -944,13 +952,12 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { } } -void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, +void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr, const MachineInstrBuilder &MIB, unsigned Flags, bool useAM3) { // addrmode5 output depends on the selection dag addressing dividing the // offset by 4 that it then later multiplies. Do this here as well. - if (VT.getSimpleVT().SimpleTy == MVT::f32 || - VT.getSimpleVT().SimpleTy == MVT::f64) + if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64) Addr.Offset /= 4; // Frame base works a bit differently. Handle it separately. @@ -993,14 +1000,13 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, AddOptionalDefs(MIB); } -bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, +bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, unsigned Alignment, bool isZExt, bool allocReg) { - assert(VT.isSimple() && "Non-simple types are invalid here!"); unsigned Opc; bool useAM3 = false; bool needVMOV = false; const TargetRegisterClass *RC; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { // This is mostly going to be Neon/vector support. default: return false; case MVT::i1: @@ -1021,6 +1027,9 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, RC = &ARM::GPRRegClass; break; case MVT::i16: + if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem()) + return false; + if (isThumb2) { if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8; @@ -1033,6 +1042,9 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, RC = &ARM::GPRRegClass; break; case MVT::i32: + if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem()) + return false; + if (isThumb2) { if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) Opc = ARM::t2LDRi8; @@ -1111,11 +1123,11 @@ bool ARMFastISel::SelectLoad(const Instruction *I) { return true; } -bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, +bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, unsigned Alignment) { unsigned StrOpc; bool useAM3 = false; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { // This is mostly going to be Neon/vector support. default: return false; case MVT::i1: { @@ -1139,6 +1151,9 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, } break; case MVT::i16: + if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem()) + return false; + if (isThumb2) { if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) StrOpc = ARM::t2STRHi8; @@ -1150,6 +1165,9 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, } break; case MVT::i32: + if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem()) + return false; + if (isThumb2) { if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) StrOpc = ARM::t2STRi8; @@ -1372,14 +1390,20 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) { unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) .addReg(AddrReg)); + + const IndirectBrInst *IB = cast<IndirectBrInst>(I); + for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i) + FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]); + return true; } bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt) { Type *Ty = Src1Value->getType(); - EVT SrcVT = TLI.getValueType(Ty, true); - if (!SrcVT.isSimple()) return false; + EVT SrcEVT = TLI.getValueType(Ty, true); + if (!SrcEVT.isSimple()) return false; + MVT SrcVT = SrcEVT.getSimpleVT(); bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy()); if (isFloat && !Subtarget->hasVFP2()) @@ -1416,7 +1440,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, unsigned CmpOpc; bool isICmp = true; bool needsExt = false; - switch (SrcVT.getSimpleVT().SimpleTy) { + switch (SrcVT.SimpleTy) { default: return false; // TODO: Verify compares. case MVT::f32: @@ -1568,7 +1592,10 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) { return false; Value *Src = I->getOperand(0); - EVT SrcVT = TLI.getValueType(Src->getType(), true); + EVT SrcEVT = TLI.getValueType(Src->getType(), true); + if (!SrcEVT.isSimple()) + return false; + MVT SrcVT = SrcEVT.getSimpleVT(); if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8) return false; @@ -1577,8 +1604,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) { // Handle sign-extension. if (SrcVT == MVT::i16 || SrcVT == MVT::i8) { - EVT DestVT = MVT::i32; - SrcReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, + SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32, /*isZExt*/!isSigned); if (SrcReg == 0) return false; } @@ -1641,7 +1667,6 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { // Things need to be register sized for register moves. if (VT != MVT::i32) return false; - const TargetRegisterClass *RC = TLI.getRegClassFor(VT); unsigned CondReg = getRegForValue(I->getOperand(0)); if (CondReg == 0) return false; @@ -1674,14 +1699,16 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { .addReg(CondReg).addImm(0)); unsigned MovCCOpc; + const TargetRegisterClass *RC; if (!UseImm) { + RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass; MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr; } else { - if (!isNegativeImm) { + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass; + if (!isNegativeImm) MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi; - } else { + else MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi; - } } unsigned ResultReg = createResultReg(RC); if (!UseImm) @@ -1783,7 +1810,9 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { } bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) { - EVT VT = TLI.getValueType(I->getType(), true); + EVT FPVT = TLI.getValueType(I->getType(), true); + if (!FPVT.isSimple()) return false; + MVT VT = FPVT.getSimpleVT(); // We can get here in the case when we want to use NEON for our fp // operations, but can't figure out how to. Just use the vfp instructions @@ -1814,7 +1843,7 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) { unsigned Op2 = getRegForValue(I->getOperand(1)); if (Op2 == 0) return false; - unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); + unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addReg(Op1).addReg(Op2)); @@ -2027,7 +2056,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, if (RVLocs.size() == 2 && RetVT == MVT::f64) { // For this move we copy into two registers and then move into the // double fp reg we want. - EVT DestVT = RVLocs[0].getValVT(); + MVT DestVT = RVLocs[0].getValVT(); const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); unsigned ResultReg = createResultReg(DstRC); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -2042,7 +2071,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, UpdateValueMap(I, ResultReg); } else { assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!"); - EVT CopyVT = RVLocs[0].getValVT(); + MVT CopyVT = RVLocs[0].getValVT(); // Special handling for extended integers. if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16) @@ -2073,8 +2102,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) { CallingConv::ID CC = F.getCallingConv(); if (Ret->getNumOperands() > 0) { SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), - Outs, TLI); + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; @@ -2101,8 +2129,10 @@ bool ARMFastISel::SelectRet(const Instruction *I) { return false; unsigned SrcReg = Reg + VA.getValNo(); - EVT RVVT = TLI.getValueType(RV->getType()); - EVT DestVT = VA.getValVT(); + EVT RVEVT = TLI.getValueType(RV->getType()); + if (!RVEVT.isSimple()) return false; + MVT RVVT = RVEVT.getSimpleVT(); + MVT DestVT = VA.getValVT(); // Special handling for extended integers. if (RVVT != DestVT) { if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) @@ -2147,7 +2177,9 @@ unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) { unsigned ARMFastISel::getLibcallReg(const Twine &Name) { GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false, GlobalValue::ExternalLinkage, 0, Name); - return ARMMaterializeGV(GV, TLI.getValueType(GV->getType())); + EVT LCREVT = TLI.getValueType(GV->getType()); + if (!LCREVT.isSimple()) return 0; + return ARMMaterializeGV(GV, LCREVT.getSimpleVT()); } // A quick function that will emit a call for a named libcall in F with the @@ -2256,6 +2288,9 @@ bool ARMFastISel::SelectCall(const Instruction *I, // Can't handle inline asm. if (isa<InlineAsm>(Callee)) return false; + // Allow SelectionDAG isel to handle tail calls. + if (CI->isTailCall()) return false; + // Check the calling convention. ImmutableCallSite CS(CI); CallingConv::ID CC = CS.getCallingConv(); @@ -2395,21 +2430,30 @@ bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) { } bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, - uint64_t Len) { + uint64_t Len, unsigned Alignment) { // Make sure we don't bloat code by inlining very large memcpy's. if (!ARMIsMemCpySmall(Len)) return false; - // We don't care about alignment here since we just emit integer accesses. while (Len) { MVT VT; - if (Len >= 4) - VT = MVT::i32; - else if (Len >= 2) - VT = MVT::i16; - else { - assert(Len == 1); - VT = MVT::i8; + if (!Alignment || Alignment >= 4) { + if (Len >= 4) + VT = MVT::i32; + else if (Len >= 2) + VT = MVT::i16; + else { + assert (Len == 1 && "Expected a length of 1!"); + VT = MVT::i8; + } + } else { + // Bound based on alignment. + if (Len >= 2 && Alignment == 2) + VT = MVT::i16; + else { + assert (Alignment == 1 && "Expected an alignment of 1!"); + VT = MVT::i8; + } } bool RV; @@ -2488,7 +2532,8 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { if (!ARMComputeAddress(MTI.getRawDest(), Dest) || !ARMComputeAddress(MTI.getRawSource(), Src)) return false; - if (ARMTryEmitSmallMemCpy(Dest, Src, Len)) + unsigned Alignment = MTI.getAlignment(); + if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment)) return true; } } @@ -2546,18 +2591,19 @@ bool ARMFastISel::SelectTrunc(const Instruction *I) { return true; } -unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, +unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt) { if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8) return 0; unsigned Opc; bool isBoolZext = false; - if (!SrcVT.isSimple()) return 0; - switch (SrcVT.getSimpleVT().SimpleTy) { + const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32); + switch (SrcVT.SimpleTy) { default: return 0; case MVT::i16: if (!Subtarget->hasV6Ops()) return 0; + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; if (isZExt) Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH; else @@ -2565,6 +2611,7 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, break; case MVT::i8: if (!Subtarget->hasV6Ops()) return 0; + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; if (isZExt) Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB; else @@ -2572,6 +2619,7 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, break; case MVT::i1: if (isZExt) { + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass; Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; isBoolZext = true; break; @@ -2579,7 +2627,7 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, return 0; } - unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + unsigned ResultReg = createResultReg(RC); MachineInstrBuilder MIB; MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addReg(SrcReg); @@ -2598,14 +2646,18 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) { Value *Src = I->getOperand(0); Type *SrcTy = Src->getType(); - EVT SrcVT, DestVT; - SrcVT = TLI.getValueType(SrcTy, true); - DestVT = TLI.getValueType(DestTy, true); - bool isZExt = isa<ZExtInst>(I); unsigned SrcReg = getRegForValue(Src); if (!SrcReg) return false; + EVT SrcEVT, DestEVT; + SrcEVT = TLI.getValueType(SrcTy, true); + DestEVT = TLI.getValueType(DestTy, true); + if (!SrcEVT.isSimple()) return false; + if (!DestEVT.isSimple()) return false; + + MVT SrcVT = SrcEVT.getSimpleVT(); + MVT DestVT = DestEVT.getSimpleVT(); unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt); if (ResultReg == 0) return false; UpdateValueMap(I, ResultReg); @@ -2784,6 +2836,47 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, return true; } +unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, + unsigned Align, MVT VT) { + bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); + ARMConstantPoolConstant *CPV = + ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); + unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); + + unsigned Opc; + unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT)); + // Load value. + if (isThumb2) { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::t2LDRpci), DestReg1) + .addConstantPoolIndex(Idx)); + Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs; + } else { + // The extra immediate is for addrmode2. + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(ARM::LDRcp), DestReg1) + .addConstantPoolIndex(Idx).addImm(0)); + Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs; + } + + unsigned GlobalBaseReg = AFI->getGlobalBaseReg(); + if (GlobalBaseReg == 0) { + GlobalBaseReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT)); + AFI->setGlobalBaseReg(GlobalBaseReg); + } + + unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT)); + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(Opc), DestReg2) + .addReg(DestReg1) + .addReg(GlobalBaseReg); + if (!UseGOTOFF) + MIB.addImm(0); + AddOptionalDefs(MIB); + + return DestReg2; +} + namespace llvm { FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) { diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index aee72d2..39d27c4 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -15,17 +15,16 @@ #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" -#include "llvm/CallingConv.h" -#include "llvm/Function.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -153,7 +152,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { int FramePtrSpillFI = 0; int D8SpillFI = 0; - // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue. + // All calls are tail calls in GHC calling conv, and functions have no + // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; @@ -360,7 +360,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); - // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue. + // All calls are tail calls in GHC calling conv, and functions have no + // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; @@ -694,7 +695,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i], getDefRegState(true)); if (DeleteRet) { - MIB->copyImplicitOps(&*MI); + MIB.copyImplicitOps(&*MI); MI->eraseFromParent(); } MI = MIB; @@ -1151,7 +1152,8 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { return; // Naked functions don't spill callee-saved registers. - if (MF.getFunction()->hasFnAttr(Attribute::Naked)) + if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::Naked)) return; // We are planning to use NEON instructions vst1 / vld1. @@ -1176,7 +1178,7 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned NumSpills = 0; for (; NumSpills < 8; ++NumSpills) - if (!MRI.isPhysRegOrOverlapUsed(ARM::D8 + NumSpills)) + if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills)) break; // Don't do this for just one d-register. It's not worth it. @@ -1209,6 +1211,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned FramePtr = RegInfo->getFrameRegister(MF); // Spill R4 if Thumb2 function requires stack realignment - it will be used as @@ -1218,12 +1221,12 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // FIXME: It will be better just to find spare register here. if (AFI->isThumb2Function() && (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))) - MF.getRegInfo().setPhysRegUsed(ARM::R4); + MRI.setPhysRegUsed(ARM::R4); if (AFI->isThumb1OnlyFunction()) { // Spill LR if Thumb1 function uses variable length argument lists. if (AFI->getVarArgsRegSaveSize() > 0) - MF.getRegInfo().setPhysRegUsed(ARM::LR); + MRI.setPhysRegUsed(ARM::LR); // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know // for sure what the stack size will be, but for this, an estimate is good @@ -1233,7 +1236,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // FIXME: It will be better just to find spare register here. unsigned StackSize = estimateStackSize(MF); if (MFI->hasVarSizedObjects() || StackSize > 508) - MF.getRegInfo().setPhysRegUsed(ARM::R4); + MRI.setPhysRegUsed(ARM::R4); } // See if we can spill vector registers to aligned stack. @@ -1241,7 +1244,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Spill the BasePtr if it's used. if (RegInfo->hasBasePointer(MF)) - MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); + MRI.setPhysRegUsed(RegInfo->getBaseRegister()); // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. @@ -1249,7 +1252,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; bool Spilled = false; - if (MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) { + if (MRI.isPhysRegUsed(Reg)) { Spilled = true; CanEliminateFrame = false; } @@ -1338,7 +1341,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. // Spill LR as well so we can fold BX_RET to the registers restore (LDM). if (!LRSpilled && CS1Spilled) { - MF.getRegInfo().setPhysRegUsed(ARM::LR); + MRI.setPhysRegUsed(ARM::LR); NumGPRSpills++; UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), (unsigned)ARM::LR)); @@ -1347,7 +1350,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } if (hasFP(MF)) { - MF.getRegInfo().setPhysRegUsed(FramePtr); + MRI.setPhysRegUsed(FramePtr); NumGPRSpills++; } @@ -1362,16 +1365,16 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Don't spill high register if the function is thumb1 if (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || Reg == ARM::LR) { - MF.getRegInfo().setPhysRegUsed(Reg); - if (!RegInfo->isReservedReg(MF, Reg)) + MRI.setPhysRegUsed(Reg); + if (!MRI.isReserved(Reg)) ExtraCSSpill = true; break; } } } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) { unsigned Reg = UnspilledCS2GPRs.front(); - MF.getRegInfo().setPhysRegUsed(Reg); - if (!RegInfo->isReservedReg(MF, Reg)) + MRI.setPhysRegUsed(Reg); + if (!MRI.isReserved(Reg)) ExtraCSSpill = true; } } @@ -1389,7 +1392,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, while (NumExtras && !UnspilledCS1GPRs.empty()) { unsigned Reg = UnspilledCS1GPRs.back(); UnspilledCS1GPRs.pop_back(); - if (!RegInfo->isReservedReg(MF, Reg) && + if (!MRI.isReserved(Reg) && (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || Reg == ARM::LR)) { Extras.push_back(Reg); @@ -1401,7 +1404,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, while (NumExtras && !UnspilledCS2GPRs.empty()) { unsigned Reg = UnspilledCS2GPRs.back(); UnspilledCS2GPRs.pop_back(); - if (!RegInfo->isReservedReg(MF, Reg)) { + if (!MRI.isReserved(Reg)) { Extras.push_back(Reg); NumExtras--; } @@ -1409,7 +1412,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } if (Extras.size() && NumExtras == 0) { for (unsigned i = 0, e = Extras.size(); i != e; ++i) { - MF.getRegInfo().setPhysRegUsed(Extras[i]); + MRI.setPhysRegUsed(Extras[i]); } } else if (!AFI->isThumb1OnlyFunction()) { // note: Thumb1 functions spill to R12, not the stack. Reserve a slot @@ -1423,7 +1426,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } if (ForceLRSpill) { - MF.getRegInfo().setPhysRegUsed(ARM::LR); + MRI.setPhysRegUsed(ARM::LR); AFI->setLRIsSpilledForFarJump(true); } } diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index a5fd15b..1240169 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -47,7 +47,7 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { // Skip over one non-VFP / NEON instruction. if (!LastMI->isBarrier() && // On A9, AGU and NEON/FPU are muxed. - !(STI.isCortexA9() && (LastMI->mayLoad() || LastMI->mayStore())) && + !(STI.isLikeA9() && (LastMI->mayLoad() || LastMI->mayStore())) && (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) { MachineBasicBlock::iterator I = LastMI; if (I != LastMI->getParent()->begin()) { diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 1406620..939bed7 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -16,24 +16,24 @@ #include "ARMBaseInstrInfo.h" #include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -78,6 +78,8 @@ public: return "ARM Instruction Selection"; } + virtual void PreprocessISelDAG(); + /// getI32Imm - Return a target constant of type i32 with the specified /// value. inline SDValue getI32Imm(unsigned Imm) { @@ -265,15 +267,16 @@ private: char ConstraintCode, std::vector<SDValue> &OutOps); - // Form pairs of consecutive S, D, or Q registers. - SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1); - SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1); - SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1); + // Form pairs of consecutive R, S, D, or Q registers. + SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); + SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); + SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); + SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); // Form sequences of 4 consecutive S, D, or Q registers. - SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); - SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); - SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); + SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); + SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); + SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); // Get the alignment operand for a NEON VLD or VST instruction. SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector); @@ -305,7 +308,7 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { } /// \brief Check whether a particular node is a constant value representable as -/// (N * Scale) where (N in [\arg RangeMin, \arg RangeMax). +/// (N * Scale) where (N in [\p RangeMin, \p RangeMax). /// /// \param ScaledConstant [out] - On success, the pre-scaled constant value. static bool isScaledConstantInRange(SDValue Node, int Scale, @@ -326,6 +329,87 @@ static bool isScaledConstantInRange(SDValue Node, int Scale, return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; } +void ARMDAGToDAGISel::PreprocessISelDAG() { + if (!Subtarget->hasV6T2Ops()) + return; + + bool isThumb2 = Subtarget->isThumb(); + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); I != E; ) { + SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. + + if (N->getOpcode() != ISD::ADD) + continue; + + // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with + // leading zeros, followed by consecutive set bits, followed by 1 or 2 + // trailing zeros, e.g. 1020. + // Transform the expression to + // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number + // of trailing zeros of c2. The left shift would be folded as an shifter + // operand of 'add' and the 'and' and 'srl' would become a bits extraction + // node (UBFX). + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + unsigned And_imm = 0; + if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { + if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) + std::swap(N0, N1); + } + if (!And_imm) + continue; + + // Check if the AND mask is an immediate of the form: 000.....1111111100 + unsigned TZ = CountTrailingZeros_32(And_imm); + if (TZ != 1 && TZ != 2) + // Be conservative here. Shifter operands aren't always free. e.g. On + // Swift, left shifter operand of 1 / 2 for free but others are not. + // e.g. + // ubfx r3, r1, #16, #8 + // ldr.w r3, [r0, r3, lsl #2] + // vs. + // mov.w r9, #1020 + // and.w r2, r9, r1, lsr #14 + // ldr r2, [r0, r2] + continue; + And_imm >>= TZ; + if (And_imm & (And_imm + 1)) + continue; + + // Look for (and (srl X, c1), c2). + SDValue Srl = N1.getOperand(0); + unsigned Srl_imm = 0; + if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || + (Srl_imm <= 2)) + continue; + + // Make sure first operand is not a shifter operand which would prevent + // folding of the left shift. + SDValue CPTmp0; + SDValue CPTmp1; + SDValue CPTmp2; + if (isThumb2) { + if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1)) + continue; + } else { + if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || + SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) + continue; + } + + // Now make the transformation. + Srl = CurDAG->getNode(ISD::SRL, Srl.getDebugLoc(), MVT::i32, + Srl.getOperand(0), + CurDAG->getConstant(Srl_imm+TZ, MVT::i32)); + N1 = CurDAG->getNode(ISD::AND, N1.getDebugLoc(), MVT::i32, + Srl, CurDAG->getConstant(And_imm, MVT::i32)); + N1 = CurDAG->getNode(ISD::SHL, N1.getDebugLoc(), MVT::i32, + N1, CurDAG->getConstant(TZ, MVT::i32)); + CurDAG->UpdateNodeOperands(N, N0, N1); + } +} + /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at /// least on current ARM implementations) which should be avoidded. @@ -336,7 +420,8 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { if (!CheckVMLxHazard) return true; - if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9()) + if (!Subtarget->isCortexA8() && !Subtarget->isLikeA9() && + !Subtarget->isSwift()) return true; if (!N->hasOneUse()) @@ -374,12 +459,13 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt) { - if (!Subtarget->isCortexA9()) + if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) return true; if (Shift.hasOneUse()) return true; // R << 2 is free. - return ShOpcVal == ARM_AM::lsl && ShAmt == 2; + return ShOpcVal == ARM_AM::lsl && + (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); } bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, @@ -486,7 +572,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc) { if (N.getOpcode() == ISD::MUL && - (!Subtarget->isCortexA9() || N.hasOneUse())) { + ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { // X * [3,5,9] -> X + X * [2,4,8] etc. int RHSC = (int)RHS->getZExtValue(); @@ -550,7 +636,8 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, // Try matching (R shl C) + (R). if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && - !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) { + !(Subtarget->isLikeA9() || Subtarget->isSwift() || + N.getOperand(0).hasOneUse())) { ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't @@ -584,7 +671,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, SDValue &Offset, SDValue &Opc) { if (N.getOpcode() == ISD::MUL && - (!Subtarget->isCortexA9() || N.hasOneUse())) { + (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) { if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { // X * [3,5,9] -> X + X * [2,4,8] etc. int RHSC = (int)RHS->getZExtValue(); @@ -650,7 +737,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, } } - if (Subtarget->isCortexA9() && !N.hasOneUse()) { + if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) { // Compute R +/- (R << N) and reuse it. Base = N; Offset = CurDAG->getRegister(0, MVT::i32); @@ -688,7 +775,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, // Try matching (R shl C) + (R). if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && - !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) { + !(Subtarget->isLikeA9() || Subtarget->isSwift() || + N.getOperand(0).hasOneUse())) { ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't @@ -1440,9 +1528,19 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { return NULL; } -/// PairSRegs - Form a D register from a pair of S registers. -/// -SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) { +/// \brief Form a GPRPair pseudo register from a pair of GPR regs. +SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue RegClass = + CurDAG->getTargetConstant(ARM::GPRPairRegClassID, MVT::i32); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32); + const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); +} + +/// \brief Form a D register from a pair of S registers. +SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue RegClass = CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32); @@ -1452,9 +1550,8 @@ SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); } -/// PairDRegs - Form a quad register from a pair of D registers. -/// -SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { +/// \brief Form a quad register from a pair of D registers. +SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); @@ -1463,9 +1560,8 @@ SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); } -/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers. -/// -SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) { +/// \brief Form 4 consecutive D registers from a pair of Q registers. +SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); @@ -1474,9 +1570,8 @@ SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); } -/// QuadSRegs - Form 4 consecutive S registers. -/// -SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1, +/// \brief Form 4 consecutive S registers. +SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue RegClass = @@ -1490,9 +1585,8 @@ SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1, return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9); } -/// QuadDRegs - Form 4 consecutive D registers. -/// -SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1, +/// \brief Form 4 consecutive D registers. +SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32); @@ -1505,9 +1599,8 @@ SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1, return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9); } -/// QuadQRegs - Form 4 consecutive Q registers. -/// -SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1, +/// \brief Form 4 consecutive Q registers. +SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32); @@ -1780,7 +1873,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue V0 = N->getOperand(Vec0Idx + 0); SDValue V1 = N->getOperand(Vec0Idx + 1); if (NumVecs == 2) - SrcReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); else { SDValue V2 = N->getOperand(Vec0Idx + 2); // If it's a vst3, form a quad D-register and leave the last part as @@ -1788,13 +1881,13 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue V3 = (NumVecs == 3) ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) : N->getOperand(Vec0Idx + 3); - SrcReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); } } else { // Form a QQ register. SDValue Q0 = N->getOperand(Vec0Idx); SDValue Q1 = N->getOperand(Vec0Idx + 1); - SrcReg = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0); + SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); } unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : @@ -1836,7 +1929,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue V3 = (NumVecs == 3) ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) : N->getOperand(Vec0Idx + 3); - SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); + SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); // Store the even D registers. This is always an updating store, so that it // provides the address to the second store for the odd subregs. @@ -1946,18 +2039,18 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, SDValue V1 = N->getOperand(Vec0Idx + 1); if (NumVecs == 2) { if (is64BitVector) - SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); else - SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0); + SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); } else { SDValue V2 = N->getOperand(Vec0Idx + 2); SDValue V3 = (NumVecs == 3) ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) : N->getOperand(Vec0Idx + 3); if (is64BitVector) - SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); else - SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); + SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); } Ops.push_back(SuperReg); Ops.push_back(getI32Imm(Lane)); @@ -2083,7 +2176,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, SDValue V0 = N->getOperand(FirstTblReg + 0); SDValue V1 = N->getOperand(FirstTblReg + 1); if (NumVecs == 2) - RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0); + RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); else { SDValue V2 = N->getOperand(FirstTblReg + 2); // If it's a vtbl3, form a quad D-register and leave the last part as @@ -2091,7 +2184,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, SDValue V3 = (NumVecs == 3) ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) : N->getOperand(FirstTblReg + 3); - RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); } SmallVector<SDValue, 6> Ops; @@ -2109,10 +2202,10 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, if (!Subtarget->hasV6T2Ops()) return NULL; - unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) + unsigned Opc = isSigned + ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); - // For unsigned extracts, check for a shift right and mask unsigned And_imm = 0; if (N->getOpcode() == ISD::AND) { @@ -2130,7 +2223,29 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, // Note: The width operand is encoded as width-1. unsigned Width = CountTrailingOnes_32(And_imm) - 1; unsigned LSB = Srl_imm; + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + + if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { + // It's cheaper to use a right shift to extract the top bits. + if (Subtarget->isThumb()) { + Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; + SDValue Ops[] = { N->getOperand(0).getOperand(0), + CurDAG->getTargetConstant(LSB, MVT::i32), + getAL(CurDAG), Reg0, Reg0 }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); + } + + // ARM models shift instructions as MOVsi with shifter operand. + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); + SDValue ShOpc = + CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), + MVT::i32); + SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, + getAL(CurDAG), Reg0, Reg0 }; + return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops, 5); + } + SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, MVT::i32), CurDAG->getTargetConstant(Width, MVT::i32), @@ -2407,7 +2522,7 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { EVT VT = N->getValueType(0); if (!VT.is128BitVector() || N->getNumOperands() != 2) llvm_unreachable("unexpected CONCAT_VECTORS"); - return PairDRegs(VT, N->getOperand(0), N->getOperand(1)); + return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)); } SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { @@ -2786,13 +2901,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { unsigned NumElts = VecVT.getVectorNumElements(); if (EltVT == MVT::f64) { assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); - return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1)); + return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)); } assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); if (NumElts == 2) - return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1)); + return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)); assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); - return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1), + return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); } @@ -3005,17 +3120,19 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { DebugLoc dl = N->getDebugLoc(); SDValue Chain = N->getOperand(0); - unsigned NewOpc = ARM::LDREXD; - if (Subtarget->isThumb() && Subtarget->hasThumb2()) - NewOpc = ARM::t2LDREXD; + bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); + unsigned NewOpc = isThumb ? ARM::t2LDREXD :ARM::LDREXD; // arm_ldrexd returns a i64 value in {i32, i32} std::vector<EVT> ResTys; - ResTys.push_back(MVT::i32); - ResTys.push_back(MVT::i32); + if (isThumb) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + } else + ResTys.push_back(MVT::Untyped); ResTys.push_back(MVT::Other); - // place arguments in the right order + // Place arguments in the right order. SmallVector<SDValue, 7> Ops; Ops.push_back(MemAddr); Ops.push_back(getAL(CurDAG)); @@ -3028,30 +3145,35 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1); - // Until there's support for specifing explicit register constraints - // like the use of even/odd register pair, hardcode ldrexd to always - // use the pair [R0, R1] to hold the load result. - Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R0, - SDValue(Ld, 0), SDValue(0,0)); - Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R1, - SDValue(Ld, 1), Chain.getValue(1)); - // Remap uses. - SDValue Glue = Chain.getValue(1); + SDValue Glue = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); if (!SDValue(N, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - ARM::R0, MVT::i32, Glue); - Glue = Result.getValue(2); + SDValue Result; + if (isThumb) + Result = SDValue(Ld, 0); + else { + SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32); + SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + dl, MVT::i32, MVT::Glue, SDValue(Ld, 0), SubRegIdx, Glue); + Result = SDValue(ResNode,0); + Glue = Result.getValue(1); + } ReplaceUses(SDValue(N, 0), Result); } if (!SDValue(N, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - ARM::R1, MVT::i32, Glue); - Glue = Result.getValue(2); + SDValue Result; + if (isThumb) + Result = SDValue(Ld, 1); + else { + SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32); + SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + dl, MVT::i32, MVT::Glue, SDValue(Ld, 0), SubRegIdx, Glue); + Result = SDValue(ResNode,0); + Glue = Result.getValue(1); + } ReplaceUses(SDValue(N, 1), Result); } - - ReplaceUses(SDValue(N, 2), SDValue(Ld, 2)); + ReplaceUses(SDValue(N, 2), Glue); return NULL; } @@ -3062,38 +3184,27 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Val1 = N->getOperand(3); SDValue MemAddr = N->getOperand(4); - // Until there's support for specifing explicit register constraints - // like the use of even/odd register pair, hardcode strexd to always - // use the pair [R2, R3] to hold the i64 (i32, i32) value to be stored. - Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R2, Val0, - SDValue(0, 0)); - Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R3, Val1, Chain.getValue(1)); - - SDValue Glue = Chain.getValue(1); - Val0 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - ARM::R2, MVT::i32, Glue); - Glue = Val0.getValue(1); - Val1 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - ARM::R3, MVT::i32, Glue); - // Store exclusive double return a i32 value which is the return status // of the issued store. std::vector<EVT> ResTys; ResTys.push_back(MVT::i32); ResTys.push_back(MVT::Other); - // place arguments in the right order + bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); + // Place arguments in the right order. SmallVector<SDValue, 7> Ops; - Ops.push_back(Val0); - Ops.push_back(Val1); + if (isThumb) { + Ops.push_back(Val0); + Ops.push_back(Val1); + } else + // arm_strexd uses GPRPair. + Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); Ops.push_back(MemAddr); Ops.push_back(getAL(CurDAG)); Ops.push_back(CurDAG->getRegister(0, MVT::i32)); Ops.push_back(Chain); - unsigned NewOpc = ARM::STREXD; - if (Subtarget->isThumb() && Subtarget->hasThumb2()) - NewOpc = ARM::t2STREXD; + unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD; SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(), Ops.size()); @@ -3291,7 +3402,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { // Form a REG_SEQUENCE to force register allocation. SDValue V0 = N->getOperand(0); SDValue V1 = N->getOperand(1); - SDValue RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0); + SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); SmallVector<SDValue, 6> Ops; Ops.push_back(RegSeq); @@ -3321,6 +3432,15 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return SelectAtomic64(N, ARM::ATOMSWAP6432); case ARMISD::ATOMCMPXCHG64_DAG: return SelectAtomic64(N, ARM::ATOMCMPXCHG6432); + + case ARMISD::ATOMMIN64_DAG: + return SelectAtomic64(N, ARM::ATOMMIN6432); + case ARMISD::ATOMUMIN64_DAG: + return SelectAtomic64(N, ARM::ATOMUMIN6432); + case ARMISD::ATOMMAX64_DAG: + return SelectAtomic64(N, ARM::ATOMMAX6432); + case ARMISD::ATOMUMAX64_DAG: + return SelectAtomic64(N, ARM::ATOMUMAX6432); } return SelectCode(N); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e51315e..5b3e31f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -23,14 +23,8 @@ #include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/GlobalValue.h" -#include "llvm/Instruction.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Type.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -40,14 +34,20 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); @@ -122,6 +122,7 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); setOperationAction(ISD::SELECT, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); if (VT.isInteger()) { setOperationAction(ISD::SHL, VT, Custom); @@ -514,6 +515,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); setOperationAction(ISD::FEXP, MVT::v4f32, Expand); setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); + setOperationAction(ISD::FCEIL, MVT::v4f32, Expand); + setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand); + setOperationAction(ISD::FRINT, MVT::v4f32, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); // Neon does not support some operations on v1i64 and v2i64 types. @@ -538,6 +543,17 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); + setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); + + // NEON does not have single instruction CTPOP for vectors with element + // types wider than 8-bits. However, custom lowering can leverage the + // v8i8/v16i8 vcnt instruction. + setOperationAction(ISD::CTPOP, MVT::v2i32, Custom); + setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); + setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); + setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); + setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); @@ -635,9 +651,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); - // These are expanded into libcalls. - if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) { - // v7M has a hardware divider + if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) && + !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) { + // These are expanded into libcalls if the cpu doesn't have HW divider. setOperationAction(ISD::SDIV, MVT::i32, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); } @@ -687,7 +703,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. setInsertFencesForAtomic(true); @@ -813,9 +833,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setSchedulingPreference(Sched::Hybrid); //// temporary - rewrite interface to use type - maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1; - maxStoresPerMemset = 16; + maxStoresPerMemset = 8; maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + maxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores + maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2; + maxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores + maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2; // On ARM arguments smaller than 4 bytes are extended, so all arguments // are at least 4 bytes aligned. @@ -824,7 +847,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) benefitFromCodePlacementOpt = true; // Prefer likely predicted branches to selects on out-of-order cores. - predictableSelectIsExpensive = Subtarget->isCortexA9(); + predictableSelectIsExpensive = Subtarget->isLikeA9(); setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } @@ -840,10 +863,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // due to the common occurrence of cross class copies and subregister insertions // and extractions. std::pair<const TargetRegisterClass*, uint8_t> -ARMTargetLowering::findRepresentativeClass(EVT VT) const{ +ARMTargetLowering::findRepresentativeClass(MVT VT) const{ const TargetRegisterClass *RRC = 0; uint8_t Cost = 1; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { default: return TargetLowering::findRepresentativeClass(VT); // Use DPR as representative register class for all floating point @@ -1023,7 +1046,7 @@ EVT ARMTargetLowering::getSetCCResultType(EVT VT) const { /// getRegClassFor - Return the register class that should be used for the /// specified value type. -const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { +const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const { // Map v4i64 to QQ registers but do not make the type legal. Similarly map // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to // load / store 4 to 8 consecutive D registers. @@ -1593,19 +1616,19 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // FIXME: handle tail calls differently. unsigned CallOpc; + bool HasMinSizeAttr = MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; - else if (doesNotRet && isDirect && !isARMFunc && - Subtarget->hasRAS() && !Subtarget->isThumb1Only()) - // "mov lr, pc; b _foo" to avoid confusing the RSP - CallOpc = ARMISD::CALL_NOLINK; else CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; } else { - if (!isDirect && !Subtarget->hasV5TOps()) { + if (!isDirect && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; - } else if (doesNotRet && isDirect && Subtarget->hasRAS()) + else if (doesNotRet && isDirect && Subtarget->hasRAS() && + // Emit regular call when code size is the priority + !HasMinSizeAttr) // "mov lr, pc; b _foo" to avoid confusing the RSP CallOpc = ARMISD::CALL_NOLINK; else @@ -1655,22 +1678,31 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, /// and then confiscate the rest of the parameter registers to insure /// this. void -ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { +ARMTargetLowering::HandleByVal( + CCState *State, unsigned &size, unsigned Align) const { unsigned reg = State->AllocateReg(GPRArgRegs, 4); assert((State->getCallOrPrologue() == Prologue || State->getCallOrPrologue() == Call) && "unhandled ParmContext"); if ((!State->isFirstByValRegValid()) && (ARM::R0 <= reg) && (reg <= ARM::R3)) { - State->setFirstByValReg(reg); - // At a call site, a byval parameter that is split between - // registers and memory needs its size truncated here. In a - // function prologue, such byval parameters are reassembled in - // memory, and are not truncated. - if (State->getCallOrPrologue() == Call) { - unsigned excess = 4 * (ARM::R4 - reg); - assert(size >= excess && "expected larger existing stack allocation"); - size -= excess; + if (Subtarget->isAAPCS_ABI() && Align > 4) { + unsigned AlignInRegs = Align / 4; + unsigned Waste = (ARM::R4 - reg) % AlignInRegs; + for (unsigned i = 0; i < Waste; ++i) + reg = State->AllocateReg(GPRArgRegs, 4); + } + if (reg != 0) { + State->setFirstByValReg(reg); + // At a call site, a byval parameter that is split between + // registers and memory needs its size truncated here. In a + // function prologue, such byval parameters are reassembled in + // memory, and are not truncated. + if (State->getCallOrPrologue() == Call) { + unsigned excess = 4 * (ARM::R4 - reg); + assert(size >= excess && "expected larger existing stack allocation"); + size -= excess; + } } } // Confiscate any remaining parameter registers to preclude their @@ -1803,6 +1835,14 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, } } + // If Caller's vararg or byval argument has been split between registers and + // stack, do not perform tail call, since part of the argument is in caller's + // local frame. + const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction(). + getInfo<ARMFunctionInfo>(); + if (AFI_Caller->getVarArgsRegSaveSize()) + return false; + // If the callee takes no arguments then go on to check the results of the // call. if (!Outs.empty()) { @@ -1857,6 +1897,17 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, return true; } +bool +ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, + MachineFunction &MF, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const { + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context); + return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true, + isVarArg)); +} + SDValue ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -2534,7 +2585,10 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, DebugLoc dl, SDValue &Chain, - unsigned ArgOffset) const { + const Value *OrigArg, + unsigned OffsetFromOrigArg, + unsigned ArgOffset, + bool ForceMutable) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -2561,7 +2615,7 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, getPointerTy()); SmallVector<SDValue, 4> MemOps; - for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) { + for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) { const TargetRegisterClass *RC; if (AFI->isThumb1OnlyFunction()) RC = &ARM::tGPRRegClass; @@ -2572,7 +2626,7 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()), + MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i), false, false, 0); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, @@ -2583,7 +2637,8 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, &MemOps[0], MemOps.size()); } else // This will point to the next argument passed via stack. - AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); + AFI->setVarArgsFrameIndex( + MFI->CreateFixedObject(4, ArgOffset, !ForceMutable)); } SDValue @@ -2606,14 +2661,16 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv, /* Return*/ false, isVarArg)); - + SmallVector<SDValue, 16> ArgValues; int lastInsIndex = -1; - SDValue ArgValue; + Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); + unsigned CurArgIdx = 0; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - + std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx); + CurArgIdx = Ins[VA.getValNo()].OrigArgIndex; // Arguments stored in registers. if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); @@ -2707,14 +2764,20 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Since they could be overwritten by lowering of arguments in case of // a tail call. if (Flags.isByVal()) { - unsigned VARegSize, VARegSaveSize; - computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); - VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0); - unsigned Bytes = Flags.getByValSize() - VARegSize; - if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. - int FI = MFI->CreateFixedObject(Bytes, - VA.getLocMemOffset(), false); - InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + if (!AFI->getVarArgsFrameIndex()) { + VarArgStyleRegisters(CCInfo, DAG, + dl, Chain, CurOrigArg, + Ins[VA.getValNo()].PartOffset, + VA.getLocMemOffset(), + true /*force mutable frames*/); + int VAFrameIndex = AFI->getVarArgsFrameIndex(); + InVals.push_back(DAG.getFrameIndex(VAFrameIndex, getPointerTy())); + } else { + int FI = MFI->CreateFixedObject(Flags.getByValSize(), + VA.getLocMemOffset(), false); + InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); + } } else { int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, VA.getLocMemOffset(), true); @@ -2732,7 +2795,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // varargs if (isVarArg) - VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset()); + VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0, 0, + CCInfo.getNextStackOffset()); return Chain; } @@ -3501,6 +3565,114 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::CTLZ, dl, VT, rbit); } +/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count +/// for each 16-bit element from operand, repeated. The basic idea is to +/// leverage vcnt to get the 8-bit counts, gather and add the results. +/// +/// Trace for v4i16: +/// input = [v0 v1 v2 v3 ] (vi 16-bit element) +/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element) +/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi) +/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6] +/// [b0 b1 b2 b3 b4 b5 b6 b7] +/// +[b1 b0 b3 b2 b5 b4 b7 b6] +/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0, +/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits) +static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; + SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0)); + SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0); + SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1); + SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2); + return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3); +} + +/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the +/// bit-count for each 16-bit element from the operand. We need slightly +/// different sequencing for v4i16 and v8i16 to stay within NEON's available +/// 64/128-bit registers. +/// +/// Trace for v4i16: +/// input = [v0 v1 v2 v3 ] (vi 16-bit element) +/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi) +/// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ] +/// v4i16:Extracted = [k0 k1 k2 k3 ] +static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + SDValue BitCounts = getCTPOP16BitCounts(N, DAG); + if (VT.is64BitVector()) { + SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended, + DAG.getIntPtrConstant(0)); + } else { + SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, + BitCounts, DAG.getIntPtrConstant(0)); + return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted); + } +} + +/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the +/// bit-count for each 32-bit element from the operand. The idea here is +/// to split the vector into 16-bit elements, leverage the 16-bit count +/// routine, and then combine the results. +/// +/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged): +/// input = [v0 v1 ] (vi: 32-bit elements) +/// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1]) +/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi) +/// vrev: N0 = [k1 k0 k3 k2 ] +/// [k0 k1 k2 k3 ] +/// N1 =+[k1 k0 k3 k2 ] +/// [k0 k2 k1 k3 ] +/// N2 =+[k1 k3 k0 k2 ] +/// [k0 k2 k1 k3 ] +/// Extended =+[k1 k3 k0 k2 ] +/// [k0 k2 ] +/// Extracted=+[k1 k3 ] +/// +static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16; + + SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0)); + SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG); + SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16); + SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0); + SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1); + + if (VT.is64BitVector()) { + SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended, + DAG.getIntPtrConstant(0)); + } else { + SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2, + DAG.getIntPtrConstant(0)); + return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted); + } +} + +static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *ST) { + EVT VT = N->getValueType(0); + + assert(ST->hasNEON() && "Custom ctpop lowering requires NEON."); + assert((VT == MVT::v2i32 || VT == MVT::v4i32 || + VT == MVT::v4i16 || VT == MVT::v8i16) && + "Unexpected type for custom ctpop lowering"); + + if (VT.getVectorElementType() == MVT::i32) + return lowerCTPOP32BitElements(N, DAG); + else + return lowerCTPOP16BitElements(N, DAG); +} + static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); @@ -3892,6 +4064,36 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, return SDValue(); } +// check if an VEXT instruction can handle the shuffle mask when the +// vector sources of the shuffle are the same. +static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) { + unsigned NumElts = VT.getVectorNumElements(); + + // Assume that the first shuffle index is not UNDEF. Fail if it is. + if (M[0] < 0) + return false; + + Imm = M[0]; + + // If this is a VEXT shuffle, the immediate value is the index of the first + // element. The other shuffle indices must be the successive elements after + // the first one. + unsigned ExpectedElt = Imm; + for (unsigned i = 1; i < NumElts; ++i) { + // Increment the expected index. If it wraps around, just follow it + // back to index zero and keep going. + ++ExpectedElt; + if (ExpectedElt == NumElts) + ExpectedElt = 0; + + if (M[i] < 0) continue; // ignore UNDEF indices + if (ExpectedElt != static_cast<unsigned>(M[i])) + return false; + } + + return true; +} + static bool isVEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseVEXT, unsigned &Imm) { @@ -4215,9 +4417,26 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // If we are VDUPing a value that comes directly from a vector, that will // cause an unnecessary move to and from a GPR, where instead we could // just use VDUPLANE. - if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) - N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, + if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + // We need to create a new undef vector to use for the VDUPLANE if the + // size of the vector from which we get the value is different than the + // size of the vector that we need to create. We will insert the element + // such that the register coalescer will remove unnecessary copies. + if (VT != Value->getOperand(0).getValueType()) { + ConstantSDNode *constIndex; + constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)); + assert(constIndex && "The index is not a constant!"); + unsigned index = constIndex->getAPIntValue().getLimitedValue() % + VT.getVectorNumElements(); + N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, + DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT), + Value, DAG.getConstant(index, MVT::i32)), + DAG.getConstant(index, MVT::i32)); + } else { + N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, Value->getOperand(0), Value->getOperand(1)); + } + } else N = DAG.getNode(ARMISD::VDUP, dl, VT, Value); @@ -4634,6 +4853,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (isVREVMask(ShuffleMask, VT, 16)) return DAG.getNode(ARMISD::VREV16, dl, VT, V1); + if (V2->getOpcode() == ISD::UNDEF && + isSingletonVEXTMask(ShuffleMask, VT, Imm)) { + return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1, + DAG.getConstant(Imm, MVT::i32)); + } + // Check for Neon shuffles that modify both input vectors in place. // If both results are used, i.e., if there are two shuffles with the same // source operands and with masks corresponding to both results of one of @@ -4833,16 +5058,76 @@ static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { return false; } -/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending -/// load, or BUILD_VECTOR with extended elements, return the unextended value. -static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { +/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total +/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL. +/// We insert the required extension here to get the vector to fill a D register. +static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, + const EVT &OrigTy, + const EVT &ExtTy, + unsigned ExtOpcode) { + // The vector originally had a size of OrigTy. It was then extended to ExtTy. + // We expect the ExtTy to be 128-bits total. If the OrigTy is less than + // 64-bits we need to insert a new extension so that it will be 64-bits. + assert(ExtTy.is128BitVector() && "Unexpected extension size"); + if (OrigTy.getSizeInBits() >= 64) + return N; + + // Must extend size to at least 64 bits to be used as an operand for VMULL. + MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy; + EVT NewVT; + switch (OrigSimpleTy) { + default: llvm_unreachable("Unexpected Orig Vector Type"); + case MVT::v2i8: + case MVT::v2i16: + NewVT = MVT::v2i32; + break; + case MVT::v4i8: + NewVT = MVT::v4i16; + break; + } + return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N); +} + +/// SkipLoadExtensionForVMULL - return a load of the original vector size that +/// does not do any sign/zero extension. If the original vector is less +/// than 64 bits, an appropriate extension will be added after the load to +/// reach a total size of 64 bits. We have to add the extension separately +/// because ARM does not have a sign/zero extending load for vectors. +static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { + SDValue NonExtendingLoad = + DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(), + LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), + LD->isNonTemporal(), LD->isInvariant(), + LD->getAlignment()); + unsigned ExtOp = 0; + switch (LD->getExtensionType()) { + default: llvm_unreachable("Unexpected LoadExtType"); + case ISD::EXTLOAD: + case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break; + case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break; + } + MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy; + MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy; + return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG, + MemType, ExtType, ExtOp); +} + +/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, +/// extending load, or BUILD_VECTOR with extended elements, return the +/// unextended value. The unextended vector should be 64 bits so that it can +/// be used as an operand to a VMULL instruction. If the original vector size +/// before extension is less than 64 bits we add a an extension to resize +/// the vector to 64 bits. +static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) - return N->getOperand(0); + return AddRequiredExtensionForVMULL(N->getOperand(0), DAG, + N->getOperand(0)->getValueType(0), + N->getValueType(0), + N->getOpcode()); + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) - return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(), - LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), LD->isInvariant(), - LD->getAlignment()); + return SkipLoadExtensionForVMULL(LD, DAG); + // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will // have been legalized as a BITCAST from v4i32. if (N->getOpcode() == ISD::BITCAST) { @@ -4897,7 +5182,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { // Multiplications are only custom-lowered for 128-bit vectors so that // VMULL can be detected. Otherwise v2i64 multiplications are not legal. EVT VT = Op.getValueType(); - assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL"); + assert(VT.is128BitVector() && VT.isInteger() && + "unexpected type for custom-lowering ISD::MUL"); SDNode *N0 = Op.getOperand(0).getNode(); SDNode *N1 = Op.getOperand(1).getNode(); unsigned NewOpc = 0; @@ -4940,9 +5226,9 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { // Legalize to a VMULL instruction. DebugLoc DL = Op.getDebugLoc(); SDValue Op0; - SDValue Op1 = SkipExtension(N1, DAG); + SDValue Op1 = SkipExtensionForVMULL(N1, DAG); if (!isMLA) { - Op0 = SkipExtension(N0, DAG); + Op0 = SkipExtensionForVMULL(N0, DAG); assert(Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"); @@ -4957,8 +5243,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { // vaddl q0, d4, d5 // vmovl q1, d6 // vmul q0, q0, q1 - SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG); - SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG); + SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG); + SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG); EVT Op1VT = Op1.getValueType(); return DAG.getNode(N0->getOpcode(), DL, VT, DAG.getNode(NewOpc, DL, VT, @@ -5244,6 +5530,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRL_PARTS: case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); + case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget); case ISD::SETCC: return LowerVSETCC(Op, DAG); case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); @@ -5304,6 +5591,18 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::ATOMIC_CMP_SWAP: ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG); return; + case ISD::ATOMIC_LOAD_MIN: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG); + return; + case ISD::ATOMIC_LOAD_UMIN: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG); + return; + case ISD::ATOMIC_LOAD_MAX: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG); + return; + case ISD::ATOMIC_LOAD_UMAX: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG); + return; } if (Res.getNode()) Results.push_back(Res); @@ -5594,7 +5893,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, // ldrex dest, ptr // (sign extend dest, if required) // cmp dest, incr - // cmov.cond scratch2, dest, incr + // cmov.cond scratch2, incr, dest // strex scratch, scratch2, ptr // cmp scratch, #0 // bne- loopMBB @@ -5617,7 +5916,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) .addReg(oldval).addReg(incr)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) - .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR); + .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR); MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); if (strOpc == ARM::t2STREX) @@ -5643,7 +5942,8 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock * ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, unsigned Op1, unsigned Op2, - bool NeedsCarry, bool IsCmpxchg) const { + bool NeedsCarry, bool IsCmpxchg, + bool IsMinMax, ARMCC::CondCodes CC) const { // This also handles ATOMIC_SWAP, indicated by Op1==0. const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); @@ -5672,16 +5972,15 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *contBB = 0, *cont2BB = 0; - if (IsCmpxchg) { + if (IsCmpxchg || IsMinMax) contBB = MF->CreateMachineBasicBlock(LLVM_BB); + if (IsCmpxchg) cont2BB = MF->CreateMachineBasicBlock(LLVM_BB); - } MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); - if (IsCmpxchg) { - MF->insert(It, contBB); - MF->insert(It, cont2BB); - } + if (IsCmpxchg || IsMinMax) MF->insert(It, contBB); + if (IsCmpxchg) MF->insert(It, cont2BB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. @@ -5717,12 +6016,32 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, // for ldrexd must be different. BB = loopMBB; // Load + unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + unsigned GPRPair1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + unsigned GPRPair2; + if (IsMinMax) { + //We need an extra double register for doing min/max. + unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + GPRPair2 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef); + BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) + .addReg(undef) + .addReg(vallo) + .addImm(ARM::gsub_0); + BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair2) + .addReg(r1) + .addReg(valhi) + .addImm(ARM::gsub_1); + } + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) - .addReg(ARM::R2, RegState::Define) - .addReg(ARM::R3, RegState::Define).addReg(ptr)); + .addReg(GPRPair0, RegState::Define).addReg(ptr)); // Copy r2/r3 into dest. (This copy will normally be coalesced.) - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2); - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) + .addReg(GPRPair0, 0, ARM::gsub_0); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) + .addReg(GPRPair0, 0, ARM::gsub_1); if (IsCmpxchg) { // Add early exit @@ -5741,24 +6060,67 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, // Copy to physregs for strexd unsigned setlo = MI->getOperand(5).getReg(); unsigned sethi = MI->getOperand(6).getReg(); - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(setlo); - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(sethi); + unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef); + BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) + .addReg(undef) + .addReg(setlo) + .addImm(ARM::gsub_0); + BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1) + .addReg(r1) + .addReg(sethi) + .addImm(ARM::gsub_1); } else if (Op1) { // Perform binary operation - AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0) + unsigned tmpRegLo = MRI.createVirtualRegister(TRC); + AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), tmpRegLo) .addReg(destlo).addReg(vallo)) .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry)); - AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), ARM::R1) - .addReg(desthi).addReg(valhi)).addReg(0); + unsigned tmpRegHi = MRI.createVirtualRegister(TRC); + AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi) + .addReg(desthi).addReg(valhi)) + .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax)); + + unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); + unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) + .addReg(UndefPair) + .addReg(tmpRegLo) + .addImm(ARM::gsub_0); + BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1) + .addReg(r1) + .addReg(tmpRegHi) + .addImm(ARM::gsub_1); } else { // Copy to physregs for strexd - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(vallo); - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(valhi); + unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); + BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) + .addReg(UndefPair) + .addReg(vallo) + .addImm(ARM::gsub_0); + BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1) + .addReg(r1) + .addReg(valhi) + .addImm(ARM::gsub_1); + } + unsigned GPRPairStore = GPRPair1; + if (IsMinMax) { + // Compare and branch to exit block. + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(exitMBB).addImm(CC).addReg(ARM::CPSR); + BB->addSuccessor(exitMBB); + BB->addSuccessor(contBB); + BB = contBB; + GPRPairStore = GPRPair2; } // Store AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) - .addReg(ARM::R0).addReg(ARM::R1).addReg(ptr)); + .addReg(GPRPairStore).addReg(ptr)); // Cmp+jump AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(storesuccess).addImm(0)); @@ -5987,12 +6349,15 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4); - if (AFI->isThumb1OnlyFunction()) - BuildMI(DispatchBB, dl, TII->get(ARM::tInt_eh_sjlj_dispatchsetup)); - else if (!Subtarget->hasVFP2()) - BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup_nofp)); - else - BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); + MachineInstrBuilder MIB; + MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); + + const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); + const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); + + // Add a register mask with no preserved registers. This results in all + // registers being marked as clobbered. + MIB.addRegMask(RI.getNoPreservedMask()); unsigned NumLPads = LPadList.size(); if (Subtarget->isThumb2()) { @@ -6064,9 +6429,9 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { const Constant *C = ConstantInt::get(Int32Ty, NumLPads); // MachineConstantPool wants an explicit alignment. - unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty); + unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); if (Align == 0) - Align = getTargetData()->getTypeAllocSize(C->getType()); + Align = getDataLayout()->getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); unsigned VReg1 = MRI->createVirtualRegister(TRC); @@ -6153,9 +6518,9 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { const Constant *C = ConstantInt::get(Int32Ty, NumLPads); // MachineConstantPool wants an explicit alignment. - unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty); + unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); if (Align == 0) - Align = getTargetData()->getTypeAllocSize(C->getType()); + Align = getDataLayout()->getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); unsigned VReg1 = MRI->createVirtualRegister(TRC); @@ -6211,8 +6576,6 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { } // N.B. the order the invoke BBs are processed in doesn't matter here. - const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); - const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF); SmallVector<MachineBasicBlock*, 64> MBBLPads; for (SmallPtrSet<MachineBasicBlock*, 64>::iterator @@ -6249,7 +6612,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { DefRegs[OI->getReg()] = true; } - MachineInstrBuilder MIB(&*II); + MachineInstrBuilder MIB(*MF, &*II); for (unsigned i = 0; SavedRegs[i] != 0; ++i) { unsigned Reg = SavedRegs[i]; @@ -6326,7 +6689,9 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { UnitSize = 2; } else { // Check whether we can use NEON instructions. - if (!MF->getFunction()->hasFnAttr(Attribute::NoImplicitFloat) && + if (!MF->getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoImplicitFloat) && Subtarget->hasNEON()) { if ((Align % 16 == 0) && SizeVal >= 16) { ldrOpc = ARM::VLD1q32wb_fixed; @@ -6411,7 +6776,8 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { } else { AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ldrOpc),scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); + .addReg(srcOut, RegState::Define).addReg(srcIn) + .addReg(0).addImm(1)); AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) .addReg(scratch).addReg(destIn) @@ -6474,9 +6840,9 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { const Constant *C = ConstantInt::get(Int32Ty, LoopSize); // MachineConstantPool wants an explicit alignment. - unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty); + unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); if (Align == 0) - Align = getTargetData()->getTypeAllocSize(C->getType()); + Align = getDataLayout()->getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp)) @@ -6753,6 +7119,26 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ false, /*IsCmpxchg*/true); + case ARM::ATOMMIN6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, + isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, + /*NeedsCarry*/ true, /*IsCmpxchg*/false, + /*IsMinMax*/ true, ARMCC::LE); + case ARM::ATOMMAX6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, + isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, + /*NeedsCarry*/ true, /*IsCmpxchg*/false, + /*IsMinMax*/ true, ARMCC::GE); + case ARM::ATOMUMIN6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, + isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, + /*NeedsCarry*/ true, /*IsCmpxchg*/false, + /*IsMinMax*/ true, ARMCC::LS); + case ARM::ATOMUMAX6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, + isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, + /*NeedsCarry*/ true, /*IsCmpxchg*/false, + /*IsMinMax*/ true, ARMCC::HS); case ARM::tMOVCCr_pseudo: { // To "insert" a SELECT_CC instruction, we actually have to insert the @@ -9024,20 +9410,36 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); } -bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { - if (!Subtarget->allowsUnalignedMem()) - return false; +bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { + // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus + bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i8: case MVT::i16: - case MVT::i32: - return true; + case MVT::i32: { + // Unaligned access can use (for example) LRDB, LRDH, LDR + if (AllowsUnaligned) { + if (Fast) + *Fast = Subtarget->hasV7Ops(); + return true; + } + return false; + } case MVT::f64: - return Subtarget->hasNEON(); - // FIXME: VLD1 etc with standard alignment is legal. + case MVT::v2f64: { + // For any little-endian targets with neon, we can support unaligned ld/st + // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8. + // A big-endian target may also explictly support unaligned accesses + if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) { + if (Fast) + *Fast = true; + return true; + } + return false; + } } } @@ -9049,33 +9451,59 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { const Function *F = MF.getFunction(); // See if we can use NEON instructions for this... - if (IsZeroVal && - !F->hasFnAttr(Attribute::NoImplicitFloat) && - Subtarget->hasNEON()) { - if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) { - return MVT::v4i32; - } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) { - return MVT::v2i32; + if ((!IsMemset || ZeroMemset) && + Subtarget->hasNEON() && + !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoImplicitFloat)) { + bool Fast; + if (Size >= 16 && + (memOpAlign(SrcAlign, DstAlign, 16) || + (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) && Fast))) { + return MVT::v2f64; + } else if (Size >= 8 && + (memOpAlign(SrcAlign, DstAlign, 8) || + (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) && Fast))) { + return MVT::f64; } } // Lowering to i32/i16 if the size permits. - if (Size >= 4) { + if (Size >= 4) return MVT::i32; - } else if (Size >= 2) { + else if (Size >= 2) return MVT::i16; - } // Let the target-independent logic figure it out. return MVT::Other; } +bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { + if (Val.getOpcode() != ISD::LOAD) + return false; + + EVT VT1 = Val.getValueType(); + if (!VT1.isSimple() || !VT1.isInteger() || + !VT2.isSimple() || !VT2.isInteger()) + return false; + + switch (VT1.getSimpleVT().SimpleTy) { + default: break; + case MVT::i1: + case MVT::i8: + case MVT::i16: + // 8-bit and 16-bit loads implicitly zero-extend to 32-bits. + return true; + } + + return false; +} + static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { if (V < 0) return false; @@ -9850,7 +10278,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::arm_neon_vld4lane: { Info.opc = ISD::INTRINSIC_W_CHAIN; // Conservatively set memVT to the entire set of vectors loaded. - uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8; + uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8; Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; @@ -9875,7 +10303,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; - NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8; + NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8; } Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); @@ -9915,3 +10343,4 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return false; } + diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 2b8f382..9ee17f0 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -17,11 +17,11 @@ #include "ARM.h" #include "ARMSubtarget.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" #include <vector> namespace llvm { @@ -232,7 +232,11 @@ namespace llvm { ATOMAND64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG, - ATOMCMPXCHG64_DAG + ATOMCMPXCHG64_DAG, + ATOMMIN64_DAG, + ATOMUMIN64_DAG, + ATOMMAX64_DAG, + ATOMUMAX64_DAG }; } @@ -248,7 +252,7 @@ namespace llvm { public: explicit ARMTargetLowering(TargetMachine &TM); - virtual unsigned getJumpTableEncoding(void) const; + virtual unsigned getJumpTableEncoding() const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; @@ -281,15 +285,19 @@ namespace llvm { bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const; /// allowsUnalignedMemoryAccesses - Returns true if the target allows - /// unaligned memory accesses. of the specified type. - virtual bool allowsUnalignedMemoryAccesses(EVT VT) const; + /// unaligned memory accesses of the specified type. Returns whether it + /// is "fast" by reference in the second argument. + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const; virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; + using TargetLowering::isZExtFree; + virtual bool isZExtFree(SDValue Val, EVT VT2) const; + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; @@ -358,7 +366,7 @@ namespace llvm { /// getRegClassFor - Return the register class that should be used for the /// specified value type. - virtual const TargetRegisterClass *getRegClassFor(EVT VT) const; + virtual const TargetRegisterClass *getRegClassFor(MVT VT) const; /// getMaximalGlobalOffset - Returns the maximal possible offset which can /// be used for loads / stores from the global. @@ -384,7 +392,7 @@ namespace llvm { unsigned Intrinsic) const; protected: std::pair<const TargetRegisterClass*, uint8_t> - findRepresentativeClass(EVT VT) const; + findRepresentativeClass(MVT VT) const; private: /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can @@ -466,7 +474,11 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals) const; void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, - DebugLoc dl, SDValue &Chain, unsigned ArgOffset) + DebugLoc dl, SDValue &Chain, + const Value *OrigArg, + unsigned OffsetFromOrigArg, + unsigned ArgOffset, + bool ForceMutable = false) const; void computeRegArea(CCState &CCInfo, MachineFunction &MF, @@ -477,7 +489,7 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals) const; /// HandleByVal - Target-specific cleanup for ByVal support. - virtual void HandleByVal(CCState *, unsigned &) const; + virtual void HandleByVal(CCState *, unsigned &, unsigned) const; /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call @@ -491,6 +503,12 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const; + + virtual bool CanLowerReturn(CallingConv::ID CallConv, + MachineFunction &MF, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const; + virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -522,7 +540,9 @@ namespace llvm { unsigned Op1, unsigned Op2, bool NeedsCarry = false, - bool IsCmpxchg = false) const; + bool IsCmpxchg = false, + bool IsMinMax = false, + ARMCC::CondCodes CC = ARMCC::AL) const; MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index c8966fb..67a6820 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -846,6 +846,23 @@ class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops, let Inst{3-0} = Rm; } +// Division instructions. +class ADivA1I<bits<3> opcod, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list<dag> pattern> + : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin, + opc, asm, "", pattern> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + let Inst{27-23} = 0b01110; + let Inst{22-20} = opcod; + let Inst{19-16} = Rd; + let Inst{15-12} = 0b1111; + let Inst{11-8} = Rm; + let Inst{7-4} = 0b0001; + let Inst{3-0} = Rn; +} + // PKH instructions def PKHLSLAsmOperand : ImmAsmOperand { let Name = "PKHLSLImm"; @@ -893,6 +910,10 @@ class ARMV5TPat<dag pattern, dag result> : Pat<pattern, result> { class ARMV5TEPat<dag pattern, dag result> : Pat<pattern, result> { list<Predicate> Predicates = [IsARM, HasV5TE]; } +// ARMV5MOPat - Same as ARMV5TEPat with UseMulOps. +class ARMV5MOPat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [IsARM, HasV5TE, UseMulOps]; +} class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> { list<Predicate> Predicates = [IsARM, HasV6]; } diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 31b0c41..80f0ec7 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -13,13 +13,17 @@ #include "ARMInstrInfo.h" #include "ARM.h" +#include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" +#include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" using namespace llvm; @@ -84,3 +88,61 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } + +namespace { + /// ARMCGBR - Create Global Base Reg pass. This initializes the PIC + /// global base register for ARM ELF. + struct ARMCGBR : public MachineFunctionPass { + static char ID; + ARMCGBR() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + if (AFI->getGlobalBaseReg() == 0) + return false; + + const ARMTargetMachine *TM = + static_cast<const ARMTargetMachine *>(&MF.getTarget()); + if (TM->getRelocationModel() != Reloc::PIC_) + return false; + + LLVMContext* Context = &MF.getFunction()->getContext(); + GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false, + GlobalValue::ExternalLinkage, 0, + "_GLOBAL_OFFSET_TABLE_"); + unsigned Id = AFI->createPICLabelUId(); + ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id); + unsigned Align = TM->getDataLayout()->getPrefTypeAlignment(GV->getType()); + unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align); + + MachineBasicBlock &FirstMBB = MF.front(); + MachineBasicBlock::iterator MBBI = FirstMBB.begin(); + DebugLoc DL = FirstMBB.findDebugLoc(MBBI); + unsigned GlobalBaseReg = AFI->getGlobalBaseReg(); + unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ? + ARM::t2LDRpci : ARM::LDRcp; + const TargetInstrInfo &TII = *TM->getInstrInfo(); + MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL, + TII.get(Opc), GlobalBaseReg) + .addConstantPoolIndex(Idx); + if (Opc == ARM::LDRcp) + MIB.addImm(0); + AddDefaultPred(MIB); + + return true; + } + + virtual const char *getPassName() const { + return "ARM PIC Global Base Reg Initialization"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +char ARMCGBR::ID = 0; +FunctionPass* +llvm::createARMGlobalBaseRegPass() { return new ARMCGBR(); } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index e23989e..12712c0 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -207,6 +207,8 @@ def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16","half-float">; def HasDivide : Predicate<"Subtarget->hasDivide()">, AssemblerPredicate<"FeatureHWDiv", "divide">; +def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">, + AssemblerPredicate<"FeatureHWDivARM">; def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">, AssemblerPredicate<"FeatureT2XtPk", "pack/extract">; @@ -242,6 +244,7 @@ def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; def UseMovt : Predicate<"Subtarget->useMovt()">; def DontUseMovt : Predicate<"!Subtarget->useMovt()">; def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; +def UseMulOps : Predicate<"Subtarget->useMulOps()">; // Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available. // But only select them if more precision in FP computation is allowed. @@ -252,6 +255,20 @@ def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || " "Subtarget->isTargetDarwin()">; +// VGETLNi32 is microcoded on Swift - prefer VMOV. +def HasFastVGETLNi32 : Predicate<"!Subtarget->isSwift()">; +def HasSlowVGETLNi32 : Predicate<"Subtarget->isSwift()">; + +// VDUP.32 is microcoded on Swift - prefer VMOV. +def HasFastVDUP32 : Predicate<"!Subtarget->isSwift()">; +def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">; + +// Cortex-A9 prefers VMOVSR to VMOVDRR even when using NEON for scalar FP, as +// this allows more effective execution domain optimization. See +// setExecutionDomain(). +def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">; +def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">; + def IsLE : Predicate<"TLI.isLittleEndian()">; def IsBE : Predicate<"TLI.isBigEndian()">; @@ -266,15 +283,13 @@ class RegConstraint<string C> { // ARM specific transformation functions and pattern fragments. // -// imm_neg_XFORM - Return a imm value packed into the format described for -// imm_neg defs below. +// imm_neg_XFORM - Return the negation of an i32 immediate value. def imm_neg_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32); }]>; -// so_imm_not_XFORM - Return a so_imm value packed into the format described for -// so_imm_not def below. -def so_imm_not_XFORM : SDNodeXForm<imm, [{ +// imm_not_XFORM - Return the complement of a i32 immediate value. +def imm_not_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32); }]>; @@ -297,7 +312,7 @@ def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; } def so_imm_not : Operand<i32>, PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1; - }], so_imm_not_XFORM> { + }], imm_not_XFORM> { let ParserMatchClass = so_imm_not_asmoperand; } @@ -402,6 +417,8 @@ def reglist : Operand<i32> { let DecoderMethod = "DecodeRegListOperand"; } +def GPRPairOp : RegisterOperand<GPRPair, "printGPRPairOperand">; + def DPRRegListAsmOperand : AsmOperandClass { let Name = "DPRRegList"; } def dpr_reglist : Operand<i32> { let EncoderMethod = "getRegisterListOpValue"; @@ -1607,6 +1624,18 @@ def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2, GPR:$set1, GPR:$set2), NoItinerary, []>; +def ATOMMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMUMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMUMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; } def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary, @@ -3092,15 +3121,19 @@ def : ARMPat<(ARMaddc GPR:$src, so_imm_neg:$imm), (SUBSri GPR:$src, so_imm_neg:$imm)>; def : ARMPat<(add GPR:$src, imm0_65535_neg:$imm), - (SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>; + (SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>, + Requires<[IsARM, HasV6T2]>; def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm), - (SUBSrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>; + (SUBSrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>, + Requires<[IsARM, HasV6T2]>; // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. def : ARMPat<(ARMadde GPR:$src, so_imm_not:$imm, CPSR), (SBCri GPR:$src, so_imm_not:$imm)>; +def : ARMPat<(ARMadde GPR:$src, imm0_65535_neg:$imm, CPSR), + (SBCrr GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>; // Note: These are implemented in C++ code, because they have to generate // ADD/SUBrs instructions, which use a complex pattern that a xform function @@ -3444,13 +3477,13 @@ def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, 4, IIC_iMUL32, [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))], (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6, UseMulOps]>; } def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6, UseMulOps]> { bits<4> Ra; let Inst{15-12} = Ra; } @@ -3466,7 +3499,7 @@ def MLAv5: ARMPseudoExpand<(outs GPR:$Rd), def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>, - Requires<[IsARM, HasV6T2]> { + Requires<[IsARM, HasV6T2, UseMulOps]> { bits<4> Rd; bits<4> Rm; bits<4> Rn; @@ -3572,7 +3605,7 @@ def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6, UseMulOps]>; def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), @@ -3582,7 +3615,7 @@ def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd), def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6, UseMulOps]>; def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), @@ -3636,7 +3669,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16), (sext_inreg GPRnopc:$Rm, i16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -3644,7 +3677,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16), (sra GPRnopc:$Rm, (i32 16)))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -3652,7 +3685,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)), (sext_inreg GPRnopc:$Rm, i16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -3660,7 +3693,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)), (sra GPRnopc:$Rm, (i32 16)))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -3668,7 +3701,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (sra (opnode GPRnopc:$Rn, (sext_inreg GPRnopc:$Rm, i16)), (i32 16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -3676,7 +3709,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (sra (opnode GPRnopc:$Rn, (sra GPRnopc:$Rm, (i32 16))), (i32 16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; } } @@ -3779,6 +3812,19 @@ defm SMUA : AI_sdml<0, "smua">; defm SMUS : AI_sdml<1, "smus">; //===----------------------------------------------------------------------===// +// Division Instructions (ARMv7-A with virtualization extension) +// +def SDIV : ADivA1I<0b001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, + "sdiv", "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (sdiv GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasDivideInARM]>; + +def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, + "udiv", "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (udiv GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasDivideInARM]>; + +//===----------------------------------------------------------------------===// // Misc. Arithmetic Instructions. // @@ -4197,8 +4243,8 @@ def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), NoItinerary, "ldrex", "\t$Rt, $addr", []>; let hasExtraDefRegAllocReq = 1 in -def LDREXD: AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2),(ins addr_offset_none:$addr), - NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []> { +def LDREXD: AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), + NoItinerary, "ldrexd", "\t$Rt, $addr", []> { let DecoderMethod = "DecodeDoubleRegLoad"; } } @@ -4212,8 +4258,8 @@ def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>; let hasExtraSrcRegAllocReq = 1 in def STREXD : AIstrex<0b01, (outs GPR:$Rd), - (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr), - NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []> { + (ins GPRPairOp:$Rt, addr_offset_none:$addr), + NoItinerary, "strexd", "\t$Rd, $Rt, $addr", []> { let DecoderMethod = "DecodeDoubleRegStore"; } } @@ -4711,21 +4757,13 @@ def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch), Requires<[IsARM, IsIOS]>; } -// eh.sjlj.dispatchsetup pseudo-instructions. -// These pseudos are used for both ARM and Thumb2. Any differences are -// handled when the pseudo is expanded (which happens before any passes -// that need the instruction size). -let Defs = - [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR, - Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ], - isBarrier = 1 in +// eh.sjlj.dispatchsetup pseudo-instruction. +// This pseudo is used for both ARM and Thumb. Any differences are handled when +// the pseudo is expanded (which happens before any passes that need the +// instruction size). +let isBarrier = 1 in def Int_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>; -let Defs = - [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ], - isBarrier = 1 in -def Int_eh_sjlj_dispatchsetup_nofp : PseudoInst<(outs), (ins), NoItinerary, []>; - //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -4829,32 +4867,32 @@ def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)), (SMULWB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), (sra (shl GPR:$b, (i32 16)), (i32 16)))), (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, sext_16_node:$b)), (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), (sra GPR:$b, (i32 16)))), (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))), (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul (sra GPR:$a, (i32 16)), (sra (shl GPR:$b, (i32 16)), (i32 16)))), (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), (i32 16))), (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (sra (mul GPR:$a, sext_16_node:$b), (i32 16))), (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 8158a11..697a8d2 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -398,6 +398,20 @@ def VecListFourQWordIndexed : Operand<i32> { let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } +def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 8; +}]>; +def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 8; +}]>; +def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() == 4; +}]>; +def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() == 4; +}]>; def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast<LoadSDNode>(N)->getAlignment() == 2; }]>; @@ -2273,6 +2287,25 @@ def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>; +// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 +// load / store if it's legal. +def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), + (VLD1q64 addrmode6:$addr)>; +def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q64 addrmode6:$addr, QPR:$value)>; +def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), + (VLD1q32 addrmode6:$addr)>; +def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q32 addrmode6:$addr, QPR:$value)>; +def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), + (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; +def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), + (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; + //===----------------------------------------------------------------------===// // NEON pattern fragments //===----------------------------------------------------------------------===// @@ -4455,10 +4488,36 @@ def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", [(set DPR:$Vd, (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; +def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), + (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), + (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), + (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), + (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), + (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), (and DPR:$Vm, (vnotd DPR:$Vd)))), - (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; + +def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), + (and DPR:$Vm, (vnotd DPR:$Vd)))), + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), @@ -4467,9 +4526,35 @@ def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), [(set QPR:$Vd, (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; +def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), + (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), + (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), + (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), + (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), + (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; + def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), (and QPR:$Vm, (vnotq QPR:$Vd)))), - (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), + (and QPR:$Vm, (vnotq QPR:$Vd)))), + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; // VBIF : Vector Bitwise Insert if False // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", @@ -4792,12 +4877,15 @@ defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", int_arm_neon_vabs>; -def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAD, "vabs", "f32", - v2f32, v2f32, int_arm_neon_vabs>; -def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAQ, "vabs", "f32", - v4f32, v4f32, int_arm_neon_vabs>; +def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, + "vabs", "f32", + v2f32, v2f32, fabs>; +def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, + "vabs", "f32", + v4f32, v4f32, fabs>; + +def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>; +def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>; // VQABS : Vector Saturating Absolute Value defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, @@ -4983,7 +5071,8 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), IIC_VMOVSI, "vmov", "32", "$R, $V$lane", [(set GPR:$R, (extractelt (v2i32 DPR:$V), - imm:$lane))]> { + imm:$lane))]>, + Requires<[HasNEON, HasFastVGETLNi32]> { let Inst{21} = lane{0}; } // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td @@ -5006,7 +5095,16 @@ def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane))>; + (SubReg_i32_lane imm:$lane))>, + Requires<[HasNEON, HasFastVGETLNi32]>; +def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, + Requires<[HasNEON, HasSlowVGETLNi32]>; +def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, + Requires<[HasNEON, HasSlowVGETLNi32]>; def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), (SSubReg_f32_reg imm:$src2))>; @@ -5045,23 +5143,25 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), GPR:$R, imm:$lane))]> { let Inst{21} = lane{0}; } - -def VSETLNi8Q : PseudoNeonI<(outs QPR:$V), - (ins QPR:$src1, GPR:$R, VectorIndex8:$lane), - IIC_VMOVISL, "", - [(set QPR:$V, (vector_insert (v16i8 QPR:$src1), - GPR:$R, imm:$lane))]>; -def VSETLNi16Q : PseudoNeonI<(outs QPR:$V), - (ins QPR:$src1, GPR:$R, VectorIndex16:$lane), - IIC_VMOVISL, "", - [(set QPR:$V, (vector_insert (v8i16 QPR:$src1), - GPR:$R, imm:$lane))]>; } - +def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), + (v16i8 (INSERT_SUBREG QPR:$src1, + (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, + (DSubReg_i8_reg imm:$lane))), + GPR:$src2, (SubReg_i8_lane imm:$lane))), + (DSubReg_i8_reg imm:$lane)))>; +def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), + (v8i16 (INSERT_SUBREG QPR:$src1, + (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, + (DSubReg_i16_reg imm:$lane))), + GPR:$src2, (SubReg_i16_lane imm:$lane))), + (DSubReg_i16_reg imm:$lane)))>; def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), - (v4i32 (INSERT_SUBREG QPR:$src1, - GPR:$src2, - (SSubReg_f32_reg imm:$lane)))>; + (v4i32 (INSERT_SUBREG QPR:$src1, + (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, + (DSubReg_i32_reg imm:$lane))), + GPR:$src2, (SubReg_i32_lane imm:$lane))), + (DSubReg_i32_reg imm:$lane)))>; def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), @@ -5115,14 +5215,23 @@ class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; -def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>; +def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, + Requires<[HasNEON, HasFastVDUP32]>; def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; -def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>; +// NEONvdup patterns for uarchs with fast VDUP.32. +def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, + Requires<[HasNEON,HasFastVDUP32]>; def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; +// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. +def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, + Requires<[HasNEON,HasSlowVDUP32]>; +def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, + Requires<[HasNEON,HasSlowVDUP32]>; + // VDUP : Vector Duplicate Lane (from scalar to all elements) class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, @@ -5559,6 +5668,11 @@ def : N2VSPat<arm_ftoui, VCVTf2ud>; def : N2VSPat<arm_sitof, VCVTs2fd>; def : N2VSPat<arm_uitof, VCVTu2fd>; +// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. +def : Pat<(f32 (bitconvert GPR:$a)), + (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, + Requires<[HasNEON, DontUseVMOVSR]>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index e171f8b..ae7a5c0 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -223,6 +223,7 @@ def t_addrmode_sp : Operand<i32>, def t_addrmode_pc : Operand<i32> { let EncoderMethod = "getAddrModePCOpValue"; let DecoderMethod = "DecodeThumbAddrModePC"; + let PrintMethod = "printThumbLdrLabelOperand"; } //===----------------------------------------------------------------------===// @@ -1246,10 +1247,6 @@ def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch), [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, Requires<[IsThumb, IsIOS]>; -let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12, CPSR ], - isBarrier = 1 in -def tInt_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>; - //===----------------------------------------------------------------------===// // Non-Instruction Patterns // diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index f1a6cce..cf8b302 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -130,8 +130,9 @@ def imm0_4095_neg : Operand<i32>, PatLeaf<(i32 imm), [{ let ParserMatchClass = imm0_4095_neg_asmoperand; } -def imm0_255_neg : PatLeaf<(i32 imm), [{ - return (uint32_t)(-N->getZExtValue()) < 255; +def imm1_255_neg : PatLeaf<(i32 imm), [{ + uint32_t Val = -N->getZExtValue(); + return (Val > 0 && Val < 255); }], imm_neg_XFORM>; def imm0_255_not : PatLeaf<(i32 imm), [{ @@ -159,7 +160,7 @@ def t2addrmode_imm12 : Operand<i32>, // t2ldrlabel := imm12 def t2ldrlabel : Operand<i32> { let EncoderMethod = "getAddrModeImm12OpValue"; - let PrintMethod = "printT2LdrLabelOperand"; + let PrintMethod = "printThumbLdrLabelOperand"; } def t2ldr_pcrel_imm12_asmoperand : AsmOperandClass {let Name = "MemPCRelImm12";} @@ -1928,8 +1929,8 @@ defm t2RSBS : T2I_rbin_s_is <BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; // The AddedComplexity preferences the first variant over the others since // it can be shrunk to a 16-bit wide encoding, while the others cannot. let AddedComplexity = 1 in -def : T2Pat<(add GPR:$src, imm0_255_neg:$imm), - (t2SUBri GPR:$src, imm0_255_neg:$imm)>; +def : T2Pat<(add GPR:$src, imm1_255_neg:$imm), + (t2SUBri GPR:$src, imm1_255_neg:$imm)>; def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm), (t2SUBri GPR:$src, t2_so_imm_neg:$imm)>; def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm), @@ -1938,8 +1939,8 @@ def : T2Pat<(add GPR:$src, imm0_65535_neg:$imm), (t2SUBrr GPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>; let AddedComplexity = 1 in -def : T2Pat<(ARMaddc rGPR:$src, imm0_255_neg:$imm), - (t2SUBSri rGPR:$src, imm0_255_neg:$imm)>; +def : T2Pat<(ARMaddc rGPR:$src, imm1_255_neg:$imm), + (t2SUBSri rGPR:$src, imm1_255_neg:$imm)>; def : T2Pat<(ARMaddc rGPR:$src, t2_so_imm_neg:$imm), (t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>; def : T2Pat<(ARMaddc rGPR:$src, imm0_65535_neg:$imm), @@ -1953,7 +1954,7 @@ def : T2Pat<(ARMadde rGPR:$src, imm0_255_not:$imm, CPSR), def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR), (t2SBCri rGPR:$src, t2_so_imm_not:$imm)>; def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR), - (t2SBCrr rGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>; + (t2SBCrr rGPR:$src, (t2MOVi16 (imm_not_XFORM imm:$imm)))>; // Select Bytes -- for disassembly only @@ -2314,13 +2315,15 @@ defm t2ORN : T2I_bin_irs<0b0011, "orn", /// changed to modify CPSR. multiclass T2I_un_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Cheap = 0, bit ReMat = 0> { + PatFrag opnode, + bit Cheap = 0, bit ReMat = 0, bit MoveImm = 0> { // shifted imm def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii, opc, "\t$Rd, $imm", [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> { let isAsCheapAsAMove = Cheap; let isReMaterializable = ReMat; + let isMoveImm = MoveImm; let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -2354,7 +2357,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, let AddedComplexity = 1 in defm t2MVN : T2I_un_irs <0b0011, "mvn", IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi, - UnOpFrag<(not node:$Src)>, 1, 1>; + UnOpFrag<(not node:$Src)>, 1, 1, 1>; let AddedComplexity = 1 in def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm), @@ -2396,7 +2399,8 @@ def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, def t2MLA: T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]> { + [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]>, + Requires<[IsThumb2, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -2406,7 +2410,8 @@ def t2MLA: T2FourReg< def t2MLS: T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]> { + [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]>, + Requires<[IsThumb2, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -2475,7 +2480,7 @@ def t2SMMLA : T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -2496,7 +2501,7 @@ def t2SMMLS: T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b110; @@ -2601,7 +2606,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16), (sext_inreg rGPR:$Rm, i16))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2614,7 +2619,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16), (sra rGPR:$Rm, (i32 16)))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2627,7 +2632,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)), (sext_inreg rGPR:$Rm, i16))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2640,7 +2645,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)), (sra rGPR:$Rm, (i32 16)))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2653,7 +2658,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn, (sext_inreg rGPR:$Rm, i16)), (i32 16))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -2666,7 +2671,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn, (sra rGPR:$Rm, (i32 16))), (i32 16))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -2760,7 +2765,7 @@ def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd), // Division Instructions. // Signed and unsigned division on v7-M // -def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi, +def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "sdiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>, Requires<[HasDivide, IsThumb2]> { @@ -2771,7 +2776,7 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi, let Inst{7-4} = 0b1111; } -def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi, +def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "udiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>, Requires<[HasDivide, IsThumb2]> { @@ -3243,11 +3248,11 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br, let Inst{15-14} = 0b10; let Inst{12} = 1; - bits<20> target; + bits<24> target; let Inst{26} = target{19}; let Inst{11} = target{18}; let Inst{13} = target{17}; - let Inst{21-16} = target{16-11}; + let Inst{25-16} = target{20-11}; let Inst{10-0} = target{10-0}; let DecoderMethod = "DecodeT2BInstruction"; } @@ -3329,20 +3334,6 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { Requires<[IsThumb2, IsIOS]>; } -let isCall = 1, Defs = [LR], Uses = [SP] in { - // mov lr, pc; b if callee is marked noreturn to avoid confusing the - // return stack predictor. - def t2BMOVPCB_CALL : tPseudoInst<(outs), - (ins t_bltarget:$func), - 6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, - Requires<[IsThumb]>; -} - -// Direct calls -def : T2Pat<(ARMcall_nolink texternalsym:$func), - (t2BMOVPCB_CALL texternalsym:$func)>, - Requires<[IsThumb]>; - // IT block let Defs = [ITSTATE] in def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 7d6692f..b5a896c 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -523,10 +523,12 @@ def VMOVRS : AVConv2I<0b11100001, 0b1010, let D = VFPNeonDomain; } +// Bitcast i32 -> f32. NEON prefers to use VMOVDRR. def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$Sn), (ins GPR:$Rt), IIC_fpMOVIS, "vmov", "\t$Sn, $Rt", - [(set SPR:$Sn, (bitconvert GPR:$Rt))]> { + [(set SPR:$Sn, (bitconvert GPR:$Rt))]>, + Requires<[HasVFP2, UseVMOVSR]> { // Instruction operands. bits<5> Sn; bits<4> Rt; diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index 2dc49a9..a5642d6 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -17,13 +17,12 @@ #include "ARMConstantPoolValue.h" #include "ARMRelocations.h" #include "ARMSubtarget.h" -#include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/Function.h" #include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Memory.h" +#include "llvm/Support/raw_ostream.h" #include <cstdlib> using namespace llvm; diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h index 7928184..23a6a9b 100644 --- a/lib/Target/ARM/ARMJITInfo.h +++ b/lib/Target/ARM/ARMJITInfo.h @@ -15,12 +15,12 @@ #define ARMJITINFO_H #include "ARMMachineFunctionInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/Target/TargetJITInfo.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" namespace llvm { class ARMTargetMachine; diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 897ceb6..a1c21ee 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -18,8 +18,12 @@ #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -27,19 +31,15 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/Target/TargetData.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumLDMGened , "Number of ldm instructions generated"); @@ -1408,7 +1408,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); PrevMI->setDesc(TII->get(NewOpc)); MO.setReg(ARM::PC); - PrevMI->copyImplicitOps(&*MBBI); + PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); MBB.erase(MBBI); return true; } @@ -1448,7 +1448,7 @@ namespace { static char ID; ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} - const TargetData *TD; + const DataLayout *TD; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const ARMSubtarget *STI; @@ -1478,7 +1478,7 @@ namespace { } bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - TD = Fn.getTarget().getTargetData(); + TD = Fn.getTarget().getDataLayout(); TII = Fn.getTarget().getInstrInfo(); TRI = Fn.getTarget().getRegisterInfo(); STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index e2ac9a4..b641483 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -15,8 +15,8 @@ #include "ARM.h" #include "ARMAsmPrinter.h" #include "MCTargetDesc/ARMMCExpr.h" -#include "llvm/Constants.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/IR/Constants.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Target/Mangler.h" diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index f1c8fc8..88d96c0 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -15,10 +15,10 @@ #define ARMMACHINEFUNCTIONINFO_H #include "ARMSubtarget.h" +#include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/BitVector.h" +#include "llvm/Target/TargetRegisterInfo.h" namespace llvm { @@ -108,6 +108,11 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// pass. DenseMap<unsigned, unsigned> CPEClones; + /// GlobalBaseReg - keeps track of the virtual register initialized for + /// use as the global base register. This is used for PIC in some PIC + /// relocation models. + unsigned GlobalBaseReg; + public: ARMFunctionInfo() : isThumb(false), @@ -119,7 +124,7 @@ public: GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), NumAlignedDPRCS2Regs(0), JumpTableUId(0), PICLabelUId(0), - VarArgsFrameIndex(0), HasITBlocks(false) {} + VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {} explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), @@ -130,7 +135,7 @@ public: GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32), JumpTableUId(0), PICLabelUId(0), - VarArgsFrameIndex(0), HasITBlocks(false) {} + VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {} bool isThumbFunction() const { return isThumb; } bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } @@ -249,6 +254,9 @@ public: bool hasITBlocks() const { return HasITBlocks; } void setHasITBlocks(bool h) { HasITBlocks = h; } + unsigned getGlobalBaseReg() const { return GlobalBaseReg; } + void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } + void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) { if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second) assert(0 && "Duplicate entries!"); diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 6f974fd..b0f576b 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -49,6 +49,9 @@ def ssub_0 : SubRegIndex; def ssub_1 : SubRegIndex; def ssub_2 : SubRegIndex<[dsub_1, ssub_0]>; def ssub_3 : SubRegIndex<[dsub_1, ssub_1]>; + +def gsub_0 : SubRegIndex; +def gsub_1 : SubRegIndex; // Let TableGen synthesize the remaining 12 ssub_* indices. // We don't need to name them. } @@ -247,11 +250,16 @@ def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> { } // Scalar single precision floating point register class.. -def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>; +// FIXME: Allocation order changed to s0, s2, s4, ... as a quick hack to +// avoid partial-write dependencies on D registers (S registers are +// renamed as portions of D registers). +def SPR : RegisterClass<"ARM", [f32], 32, (add (decimate + (sequence "S%u", 0, 31), 2), + (sequence "S%u", 0, 31))>; // Subset of SPR which can be used as a source of NEON scalars for 16-bit // operations -def SPR_8 : RegisterClass<"ARM", [f32], 32, (trunc SPR, 16)>; +def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>; // Scalar double precision floating point / generic 64-bit vector register // class. @@ -308,6 +316,17 @@ def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], let AltOrderSelect = [{ return 1; }]; } +// Pseudo-registers representing even-odd pairs of GPRs from R1 to R13/SP. +// These are needed by instructions (e.g. ldrexd/strexd) requiring even-odd GPRs. +def Tuples2R : RegisterTuples<[gsub_0, gsub_1], + [(add R0, R2, R4, R6, R8, R10, R12), + (add R1, R3, R5, R7, R9, R11, SP)]>; + +// Register class representing a pair of even-odd GPRs. +def GPRPair : RegisterClass<"ARM", [untyped], 64, (add Tuples2R)> { + let Size = 64; // 2 x 32 bits, we have no predefined type of that size. +} + // Pseudo-registers representing 3 consecutive D registers. def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2], [(shl DPR, 0), diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 81d2fa3..02196d0 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -55,6 +55,7 @@ def IIC_iMUL32 : InstrItinClass; def IIC_iMAC32 : InstrItinClass; def IIC_iMUL64 : InstrItinClass; def IIC_iMAC64 : InstrItinClass; +def IIC_iDIV : InstrItinClass; def IIC_iLoad_i : InstrItinClass; def IIC_iLoad_r : InstrItinClass; def IIC_iLoad_si : InstrItinClass; @@ -261,3 +262,4 @@ def IIC_VTBX4 : InstrItinClass; include "ARMScheduleV6.td" include "ARMScheduleA8.td" include "ARMScheduleA9.td" +include "ARMScheduleSwift.td" diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 7bc590f..404634f 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1876,8 +1876,9 @@ def CortexA9Itineraries : ProcessorItineraries< ]>; // ===---------------------------------------------------------------------===// -// This following definitions describe the simple machine model which -// will replace itineraries. +// The following definitions describe the simpler per-operand machine model. +// This works with MachineScheduler and will eventually replace itineraries. + // Cortex-A9 machine model for scheduling and other instruction cost heuristics. def CortexA9Model : SchedMachineModel { @@ -1891,5 +1892,595 @@ def CortexA9Model : SchedMachineModel { let Itineraries = CortexA9Itineraries; } -// TODO: Add Cortex-A9 processor and scheduler resources. +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available. + +def A9UnitALU : ProcResource<2>; +def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; } +def A9UnitAGU : ProcResource<1>; +def A9UnitLS : ProcResource<1>; +def A9UnitFP : ProcResource<1> { let Buffered = 0; } +def A9UnitB : ProcResource<1>; + +//===----------------------------------------------------------------------===// +// Define scheduler read/write types with their resources and latency on A9. + +// Consume an issue slot, but no processor resources. This is useful when all +// other writes associated with the operand have NumMicroOps = 0. +def A9WriteIssue : SchedWriteRes<[]> { let Latency = 0; } + +// Write an integer register. +def A9WriteI : SchedWriteRes<[A9UnitALU]>; +// Write an integer shifted-by register +def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; } + +// Basic ALU. +def A9WriteA : SchedWriteRes<[A9UnitALU]>; +// ALU with operand shifted by immediate. +def A9WriteAsi : SchedWriteRes<[A9UnitALU]> { let Latency = 2; } +// ALU with operand shifted by register. +def A9WriteAsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; } + +// Multiplication +def A9WriteM : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; } +def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5; + let NumMicroOps = 0; } +def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; } +def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4; + let NumMicroOps = 0; } + +// Floating-point +// Only one FP or AGU instruction may issue per cycle. We model this +// by having FP instructions consume the AGU resource. +def A9WriteF : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; } +def A9WriteFMov : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; } +def A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; } +def A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; } +def A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; } +def A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } +def A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; } +def A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; } +def A9WriteFSqrtS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 17; } +def A9WriteFSqrtD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 32; } + +// NEON has an odd mix of latencies. Simply name the write types by latency. +def A9WriteV1 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; } +def A9WriteV2 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 2; } +def A9WriteV3 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 3; } +def A9WriteV4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; } +def A9WriteV5 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; } +def A9WriteV6 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; } +def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; } +def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } +def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; } + +// Reserve A9UnitFP for 2 consecutive cycles. +def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { + let Latency = 4; + let ResourceCycles = [2]; +} +def A9Write2V7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { + let Latency = 7; + let ResourceCycles = [2]; +} +def A9Write2V9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { + let Latency = 9; + let ResourceCycles = [2]; +} + +// Branches don't have a def operand but still consume resources. +def A9WriteB : SchedWriteRes<[A9UnitB]>; + +// Address generation. +def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; } + +// Load Integer. +def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; } +// Load the upper 32-bits using the same micro-op. +def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3; + let NumMicroOps = 0; } +// Offset shifted by register. +def A9WriteLsi : SchedWriteRes<[A9UnitLS]> { let Latency = 4; } +// Load (and zero extend) a byte. +def A9WriteLb : SchedWriteRes<[A9UnitLS]> { let Latency = 4; } +def A9WriteLbsi : SchedWriteRes<[A9UnitLS]> { let Latency = 5; } + +// Load or Store Float, aligned. +def A9WriteLSfp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 1; } + +// Store Integer. +def A9WriteS : SchedWriteRes<[A9UnitLS]>; + +//===----------------------------------------------------------------------===// +// Define resources dynamically for load multiple variants. + +// Define helpers for extra latency without consuming resources. +def A9WriteCycle1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; } +foreach NumCycles = 2-8 in { +def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>; +} // foreach NumCycles + +// Define TII for use in SchedVariant Predicates. +def : PredicateProlog<[{ + const ARMBaseInstrInfo *TII = + static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo()); + (void)TII; +}]>; + +// Define address generation sequences and predicates for 8 flavors of LDMs. +foreach NumAddr = 1-8 in { + +// Define A9WriteAdr1-8 as a sequence of A9WriteAdr with additive +// latency for instructions that generate multiple loads or stores. +def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>; + +// Define a predicate to select the LDM based on number of memory addresses. +def A9LMAdr#NumAddr#Pred : + SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>; + +} // foreach NumAddr + +// Fall-back for unknown LDMs. +def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(MI) == 0">; + +// LDM/VLDM/VLDn address generation latency & resources. +// Dynamically select the A9WriteAdrN sequence using a predicate. +def A9WriteLMAdr : SchedWriteVariant<[ + SchedVar<A9LMAdr1Pred, [A9WriteAdr1]>, + SchedVar<A9LMAdr2Pred, [A9WriteAdr2]>, + SchedVar<A9LMAdr3Pred, [A9WriteAdr3]>, + SchedVar<A9LMAdr4Pred, [A9WriteAdr4]>, + SchedVar<A9LMAdr5Pred, [A9WriteAdr5]>, + SchedVar<A9LMAdr6Pred, [A9WriteAdr6]>, + SchedVar<A9LMAdr7Pred, [A9WriteAdr7]>, + SchedVar<A9LMAdr8Pred, [A9WriteAdr8]>, + // For unknown LDM/VLDM/VSTM, assume 2 32-bit registers. + SchedVar<A9LMUnknownPred, [A9WriteAdr2]>]>; + +// Define LDM Resources. +// These take no issue resource, so they can be combined with other +// writes like WriteB. +// A9WriteLMLo takes a single LS resource and 2 cycles. +def A9WriteLMLo : SchedWriteRes<[A9UnitLS]> { let Latency = 2; + let NumMicroOps = 0; } +// Assuming aligned access, the upper half of each pair is free with +// the same latency. +def A9WriteLMHi : SchedWriteRes<[]> { let Latency = 2; + let NumMicroOps = 0; } +// Each A9WriteL#N variant adds N cycles of latency without consuming +// additional resources. +foreach NumAddr = 1-8 in { +def A9WriteL#NumAddr : WriteSequence< + [A9WriteLMLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; +def A9WriteL#NumAddr#Hi : WriteSequence< + [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; +} + +//===----------------------------------------------------------------------===// +// LDM: Load multiple into 32-bit integer registers. + +// A9WriteLM variants expand into a pair of writes for each 64-bit +// value loaded. When the number of registers is odd, the last +// A9WriteLnHi is naturally ignored because the instruction has no +// following def operands. These variants take no issue resource, so +// they may need to be part of a WriteSequence that includes A9WriteIssue. +def A9WriteLM : SchedWriteVariant<[ + SchedVar<A9LMAdr1Pred, [A9WriteL1, A9WriteL1Hi]>, + SchedVar<A9LMAdr2Pred, [A9WriteL1, A9WriteL1Hi, + A9WriteL2, A9WriteL2Hi]>, + SchedVar<A9LMAdr3Pred, [A9WriteL1, A9WriteL1Hi, + A9WriteL2, A9WriteL2Hi, + A9WriteL3, A9WriteL3Hi]>, + SchedVar<A9LMAdr4Pred, [A9WriteL1, A9WriteL1Hi, + A9WriteL2, A9WriteL2Hi, + A9WriteL3, A9WriteL3Hi, + A9WriteL4, A9WriteL4Hi]>, + SchedVar<A9LMAdr5Pred, [A9WriteL1, A9WriteL1Hi, + A9WriteL2, A9WriteL2Hi, + A9WriteL3, A9WriteL3Hi, + A9WriteL4, A9WriteL4Hi, + A9WriteL5, A9WriteL5Hi]>, + SchedVar<A9LMAdr6Pred, [A9WriteL1, A9WriteL1Hi, + A9WriteL2, A9WriteL2Hi, + A9WriteL3, A9WriteL3Hi, + A9WriteL4, A9WriteL4Hi, + A9WriteL5, A9WriteL5Hi, + A9WriteL6, A9WriteL6Hi]>, + SchedVar<A9LMAdr7Pred, [A9WriteL1, A9WriteL1Hi, + A9WriteL2, A9WriteL2Hi, + A9WriteL3, A9WriteL3Hi, + A9WriteL4, A9WriteL4Hi, + A9WriteL5, A9WriteL5Hi, + A9WriteL6, A9WriteL6Hi, + A9WriteL7, A9WriteL7Hi]>, + SchedVar<A9LMAdr8Pred, [A9WriteL1, A9WriteL1Hi, + A9WriteL2, A9WriteL2Hi, + A9WriteL3, A9WriteL3Hi, + A9WriteL4, A9WriteL4Hi, + A9WriteL5, A9WriteL5Hi, + A9WriteL6, A9WriteL6Hi, + A9WriteL7, A9WriteL7Hi, + A9WriteL8, A9WriteL8Hi]>, + // For unknown LDMs, define the maximum number of writes, but only + // make the first two consume resources. + SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi, + A9WriteL2, A9WriteL2Hi, + A9WriteL3Hi, A9WriteL3Hi, + A9WriteL4Hi, A9WriteL4Hi, + A9WriteL5Hi, A9WriteL5Hi, + A9WriteL6Hi, A9WriteL6Hi, + A9WriteL7Hi, A9WriteL7Hi, + A9WriteL8Hi, A9WriteL8Hi]>]> { + let Variadic = 1; +} + +//===----------------------------------------------------------------------===// +// VFP Load/Store Multiple Variants, and NEON VLDn/VSTn support. + +// A9WriteLfpOp is the same as A9WriteLSfp but takes no issue resources +// so can be used in WriteSequences for in single-issue instructions that +// encapsulate multiple loads. +def A9WriteLfpOp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { + let Latency = 1; + let NumMicroOps = 0; +} + +foreach NumAddr = 1-8 in { + +// Helper for A9WriteLfp1-8: A sequence of fp loads with no micro-ops. +def A9WriteLfp#NumAddr#Seq : WriteSequence<[A9WriteLfpOp], NumAddr>; + +// A9WriteLfp1-8 definitions are statically expanded into a sequence of +// A9WriteLfpOps with additive latency that takes a single issue slot. +// Used directly to describe NEON VLDn. +def A9WriteLfp#NumAddr : WriteSequence< + [A9WriteIssue, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>; + +// A9WriteLfp1-8Mov adds a cycle of latency and FP resource for +// permuting loaded values. +def A9WriteLfp#NumAddr#Mov : WriteSequence< + [A9WriteF, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>; + +} // foreach NumAddr + +// Define VLDM/VSTM PreRA resources. +// A9WriteLMfpPreRA are dynamically expanded into the correct +// A9WriteLfp1-8 sequence based on a predicate. This supports the +// preRA VLDM variants in which all 64-bit loads are written to the +// same tuple of either single or double precision registers. +def A9WriteLMfpPreRA : SchedWriteVariant<[ + SchedVar<A9LMAdr1Pred, [A9WriteLfp1]>, + SchedVar<A9LMAdr2Pred, [A9WriteLfp2]>, + SchedVar<A9LMAdr3Pred, [A9WriteLfp3]>, + SchedVar<A9LMAdr4Pred, [A9WriteLfp4]>, + SchedVar<A9LMAdr5Pred, [A9WriteLfp5]>, + SchedVar<A9LMAdr6Pred, [A9WriteLfp6]>, + SchedVar<A9LMAdr7Pred, [A9WriteLfp7]>, + SchedVar<A9LMAdr8Pred, [A9WriteLfp8]>, + // For unknown VLDM/VSTM PreRA, assume 2xS registers. + SchedVar<A9LMUnknownPred, [A9WriteLfp2]>]>; + +// Define VLDM/VSTM PostRA Resources. +// A9WriteLMfpLo takes a LS and FP resource and one issue slot but no latency. +def A9WriteLMfpLo : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 0; } + +foreach NumAddr = 1-8 in { + +// Each A9WriteL#N variant adds N cycles of latency without consuming +// additional resources. +def A9WriteLMfp#NumAddr : WriteSequence< + [A9WriteLMfpLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; + +// Assuming aligned access, the upper half of each pair is free with +// the same latency. +def A9WriteLMfp#NumAddr#Hi : WriteSequence< + [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; + +} // foreach NumAddr + +// VLDM PostRA Variants. These variants expand A9WriteLMfpPostRA into a +// pair of writes for each 64-bit data loaded. When the number of +// registers is odd, the last WriteLMfpnHi is naturally ignored because +// the instruction has no following def operands. +def A9WriteLMfpPostRA : SchedWriteVariant<[ + SchedVar<A9LMAdr1Pred, [A9WriteLMfp1, A9WriteLMfp1Hi]>, + SchedVar<A9LMAdr2Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, + A9WriteLMfp2, A9WriteLMfp2Hi]>, + SchedVar<A9LMAdr3Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, + A9WriteLMfp2, A9WriteLMfp2Hi, + A9WriteLMfp3, A9WriteLMfp3Hi]>, + SchedVar<A9LMAdr4Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, + A9WriteLMfp2, A9WriteLMfp2Hi, + A9WriteLMfp3, A9WriteLMfp3Hi, + A9WriteLMfp4, A9WriteLMfp4Hi]>, + SchedVar<A9LMAdr5Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, + A9WriteLMfp2, A9WriteLMfp2Hi, + A9WriteLMfp3, A9WriteLMfp3Hi, + A9WriteLMfp4, A9WriteLMfp4Hi, + A9WriteLMfp5, A9WriteLMfp5Hi]>, + SchedVar<A9LMAdr6Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, + A9WriteLMfp2, A9WriteLMfp2Hi, + A9WriteLMfp3, A9WriteLMfp3Hi, + A9WriteLMfp4, A9WriteLMfp4Hi, + A9WriteLMfp5, A9WriteLMfp5Hi, + A9WriteLMfp6, A9WriteLMfp6Hi]>, + SchedVar<A9LMAdr7Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, + A9WriteLMfp2, A9WriteLMfp2Hi, + A9WriteLMfp3, A9WriteLMfp3Hi, + A9WriteLMfp4, A9WriteLMfp4Hi, + A9WriteLMfp5, A9WriteLMfp5Hi, + A9WriteLMfp6, A9WriteLMfp6Hi, + A9WriteLMfp7, A9WriteLMfp7Hi]>, + SchedVar<A9LMAdr8Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, + A9WriteLMfp2, A9WriteLMfp2Hi, + A9WriteLMfp3, A9WriteLMfp3Hi, + A9WriteLMfp4, A9WriteLMfp4Hi, + A9WriteLMfp5, A9WriteLMfp5Hi, + A9WriteLMfp6, A9WriteLMfp6Hi, + A9WriteLMfp7, A9WriteLMfp7Hi, + A9WriteLMfp8, A9WriteLMfp8Hi]>, + // For unknown LDMs, define the maximum number of writes, but only + // make the first two consume resources. + SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp1Hi, + A9WriteLMfp2, A9WriteLMfp2Hi, + A9WriteLMfp3Hi, A9WriteLMfp3Hi, + A9WriteLMfp4Hi, A9WriteLMfp4Hi, + A9WriteLMfp5Hi, A9WriteLMfp5Hi, + A9WriteLMfp6Hi, A9WriteLMfp6Hi, + A9WriteLMfp7Hi, A9WriteLMfp7Hi, + A9WriteLMfp8Hi, A9WriteLMfp8Hi]>]> { + let Variadic = 1; +} + +// Distinguish between our multiple MI-level forms of the same +// VLDM/VSTM instructions. +def A9PreRA : SchedPredicate< + "TargetRegisterInfo::isVirtualRegister(MI->getOperand(0).getReg())">; +def A9PostRA : SchedPredicate< + "TargetRegisterInfo::isPhysicalRegister(MI->getOperand(0).getReg())">; + +// VLDM represents all destination registers as a single register +// tuple, unlike LDM. So the number of write operands is not variadic. +def A9WriteLMfp : SchedWriteVariant<[ + SchedVar<A9PreRA, [A9WriteLMfpPreRA]>, + SchedVar<A9PostRA, [A9WriteLMfpPostRA]>]>; + +//===----------------------------------------------------------------------===// +// Resources for other (non LDM/VLDM) Variants. + +// These mov immediate writers are unconditionally expanded with +// additive latency. +def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>; +def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, A9WriteA]>; +def A9WriteI2ld : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>; + +// Some ALU operations can read loaded integer values one cycle early. +def A9ReadA : SchedReadAdvance<1, + [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi, + A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4, + A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8, + A9WriteL1Hi, A9WriteL2Hi, A9WriteL3Hi, A9WriteL4Hi, + A9WriteL5Hi, A9WriteL6Hi, A9WriteL7Hi, A9WriteL8Hi]>; + +// Read types for operands that are unconditionally read in cycle N +// after the instruction issues, decreases producer latency by N-1. +def A9Read2 : SchedReadAdvance<1>; +def A9Read3 : SchedReadAdvance<2>; +def A9Read4 : SchedReadAdvance<3>; + +//===----------------------------------------------------------------------===// +// Map itinerary classes to scheduler read/write resources per operand. +// +// For ARM, we piggyback scheduler resources on the Itinerary classes +// to avoid perturbing the existing instruction definitions. + +// This table follows the ARM Cortex-A9 Technical Reference Manuals, +// mostly in order. +let SchedModel = CortexA9Model in { + +def :ItinRW<[A9WriteI], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi, + IIC_iMVNi,IIC_iMVNsi, + IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>; +def :ItinRW<[A9WriteI,A9ReadA],[IIC_iMVNr]>; +def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>; + +def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>; +def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>; +def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>; + +def :ItinRW<[A9WriteA], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>; +def :ItinRW<[A9WriteA, A9ReadA], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>; +def :ItinRW<[A9WriteA, A9ReadA, A9ReadA],[IIC_iALUr,IIC_iCMPr]>; +def :ItinRW<[A9WriteAsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>; +def :ItinRW<[A9WriteAsi, A9ReadA], [IIC_iALUsi]>; +def :ItinRW<[A9WriteAsi, ReadDefault, A9ReadA], [IIC_iALUsir]>; // RSB +def :ItinRW<[A9WriteAsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>; +def :ItinRW<[A9WriteAsr, A9ReadA], [IIC_iALUsr,IIC_iCMPsr]>; + +// A9WriteHi ignored for MUL32. +def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32, + IIC_iMUL64,IIC_iMAC64]>; +// FIXME: SMLALxx needs itin classes +def :ItinRW<[A9WriteM16, A9WriteM16Hi], [IIC_iMUL16,IIC_iMAC16]>; + +// TODO: For floating-point ops, we model the pipeline forwarding +// latencies here. WAW latencies are sometimes longer. + +def :ItinRW<[A9WriteFMov], [IIC_fpSTAT, IIC_fpMOVIS, IIC_fpMOVID, IIC_fpMOVSI, + IIC_fpUNA32, IIC_fpUNA64, + IIC_fpCMP32, IIC_fpCMP64]>; +def :ItinRW<[A9WriteFMov, A9WriteFMov], [IIC_fpMOVDI]>; +def :ItinRW<[A9WriteF], [IIC_fpCVTSD, IIC_fpCVTDS, IIC_fpCVTSH, IIC_fpCVTHS, + IIC_fpCVTIS, IIC_fpCVTID, IIC_fpCVTSI, IIC_fpCVTDI, + IIC_fpALU32, IIC_fpALU64]>; +def :ItinRW<[A9WriteFMulS], [IIC_fpMUL32]>; +def :ItinRW<[A9WriteFMulD], [IIC_fpMUL64]>; +def :ItinRW<[A9WriteFMAS], [IIC_fpMAC32]>; +def :ItinRW<[A9WriteFMAD], [IIC_fpMAC64]>; +def :ItinRW<[A9WriteFDivS], [IIC_fpDIV32]>; +def :ItinRW<[A9WriteFDivD], [IIC_fpDIV64]>; +def :ItinRW<[A9WriteFSqrtS], [IIC_fpSQRT32]>; +def :ItinRW<[A9WriteFSqrtD], [IIC_fpSQRT64]>; + +def :ItinRW<[A9WriteB], [IIC_Br]>; + +// A9 PLD is processed in a dedicated unit. +def :ItinRW<[], [IIC_Preload]>; + +// Note: We must assume that loads are aligned, since the machine +// model cannot know this statically and A9 ignores alignment hints. + +// A9WriteAdr consumes AGU regardless address writeback. But it's +// latency is only relevant for users of an updated address. +def :ItinRW<[A9WriteL, A9WriteAdr], [IIC_iLoad_i,IIC_iLoad_r, + IIC_iLoad_iu,IIC_iLoad_ru]>; +def :ItinRW<[A9WriteLsi, A9WriteAdr], [IIC_iLoad_si,IIC_iLoad_siu]>; +def :ItinRW<[A9WriteLb, A9WriteAdr2], [IIC_iLoad_bh_i,IIC_iLoad_bh_r, + IIC_iLoad_bh_iu,IIC_iLoad_bh_ru]>; +def :ItinRW<[A9WriteLbsi, A9WriteAdr2], [IIC_iLoad_bh_si,IIC_iLoad_bh_siu]>; +def :ItinRW<[A9WriteL, A9WriteLHi, A9WriteAdr], [IIC_iLoad_d_i,IIC_iLoad_d_r, + IIC_iLoad_d_ru]>; +// Store either has no def operands, or the one def for address writeback. +def :ItinRW<[A9WriteAdr, A9WriteS], [IIC_iStore_i, IIC_iStore_r, + IIC_iStore_iu, IIC_iStore_ru, + IIC_iStore_d_i, IIC_iStore_d_r, + IIC_iStore_d_ru]>; +def :ItinRW<[A9WriteAdr2, A9WriteS], [IIC_iStore_si, IIC_iStore_siu, + IIC_iStore_bh_i, IIC_iStore_bh_r, + IIC_iStore_bh_iu, IIC_iStore_bh_ru]>; +def :ItinRW<[A9WriteAdr3, A9WriteS], [IIC_iStore_bh_si, IIC_iStore_bh_siu]>; + +// A9WriteML will be expanded into a separate write for each def +// operand. Address generation consumes resources, but A9WriteLMAdr +// is listed after all def operands, so has no effective latency. +// +// Note: A9WriteLM expands into an even number of def operands. The +// actual number of def operands may be less by one. +def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteIssue], [IIC_iLoad_m, IIC_iPop]>; + +// Load multiple with address writeback has an extra def operand in +// front of the loaded registers. +// +// Reuse the load-multiple variants for store-multiple because the +// resources are identical, For stores only the address writeback +// has a def operand so the WriteL latencies are unused. +def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu, + IIC_iStore_m, + IIC_iStore_mu]>; +def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>; +def :ItinRW<[A9WriteL, A9WriteAdr, A9WriteA], [IIC_iLoadiALU]>; + +def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>; + +def :ItinRW<[A9WriteLMfp, A9WriteLMAdr], [IIC_fpLoad_m]>; +def :ItinRW<[A9WriteLMAdr, A9WriteLMfp], [IIC_fpLoad_mu]>; +def :ItinRW<[A9WriteAdr, A9WriteLSfp], [IIC_fpStore32, IIC_fpStore64, + IIC_fpStore_m, IIC_fpStore_mu]>; + +// Note: Unlike VLDM, VLD1 expects the writeback operand after the +// normal writes. +def :ItinRW<[A9WriteLfp1, A9WriteAdr1], [IIC_VLD1, IIC_VLD1u, + IIC_VLD1x2, IIC_VLD1x2u]>; +def :ItinRW<[A9WriteLfp2, A9WriteAdr2], [IIC_VLD1x3, IIC_VLD1x3u, + IIC_VLD1x4, IIC_VLD1x4u, + IIC_VLD4dup, IIC_VLD4dupu]>; +def :ItinRW<[A9WriteLfp1Mov, A9WriteAdr1], [IIC_VLD1dup, IIC_VLD1dupu, + IIC_VLD2, IIC_VLD2u, + IIC_VLD2dup, IIC_VLD2dupu]>; +def :ItinRW<[A9WriteLfp2Mov, A9WriteAdr1], [IIC_VLD1ln, IIC_VLD1lnu, + IIC_VLD2x2, IIC_VLD2x2u, + IIC_VLD2ln, IIC_VLD2lnu]>; +def :ItinRW<[A9WriteLfp3Mov, A9WriteAdr3], [IIC_VLD3, IIC_VLD3u, + IIC_VLD3dup, IIC_VLD3dupu]>; +def :ItinRW<[A9WriteLfp4Mov, A9WriteAdr4], [IIC_VLD4, IIC_VLD4u, + IIC_VLD4ln, IIC_VLD4lnu]>; +def :ItinRW<[A9WriteLfp5Mov, A9WriteAdr5], [IIC_VLD3ln, IIC_VLD3lnu]>; + +// Vector stores use similar resources to vector loads, so use the +// same write types. The address write must be first for stores with +// address writeback. +def :ItinRW<[A9WriteAdr1, A9WriteLfp1], [IIC_VST1, IIC_VST1u, + IIC_VST1x2, IIC_VST1x2u, + IIC_VST1ln, IIC_VST1lnu, + IIC_VST2, IIC_VST2u, + IIC_VST2x2, IIC_VST2x2u, + IIC_VST2ln, IIC_VST2lnu]>; +def :ItinRW<[A9WriteAdr2, A9WriteLfp2], [IIC_VST1x3, IIC_VST1x3u, + IIC_VST1x4, IIC_VST1x4u, + IIC_VST3, IIC_VST3u, + IIC_VST3ln, IIC_VST3lnu, + IIC_VST4, IIC_VST4u, + IIC_VST4ln, IIC_VST4lnu]>; + +// NEON moves. +def :ItinRW<[A9WriteV2], [IIC_VMOVSI, IIC_VMOVDI, IIC_VMOVD, IIC_VMOVQ]>; +def :ItinRW<[A9WriteV1], [IIC_VMOV, IIC_VMOVIS, IIC_VMOVID]>; +def :ItinRW<[A9WriteV3], [IIC_VMOVISL, IIC_VMOVN]>; + +// NEON integer arithmetic +// +// VADD/VAND/VORR/VEOR/VBIC/VORN/VBIT/VBIF/VBSL +def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VBINiD, IIC_VBINiQ]>; +// VSUB/VMVN/VCLSD/VCLZD/VCNTD +def :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>; +// VADDL/VSUBL/VNEG are mapped later under IIC_SHLi. +// ... +// VHADD/VRHADD/VQADD/VTST/VADH/VRADH +def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>; +// VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL +def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>; +// VQNEG/VQABS +def :ItinRW<[A9WriteV4], [IIC_VQUNAiD, IIC_VQUNAiQ]>; +// VABS +def :ItinRW<[A9WriteV4, A9Read2], [IIC_VUNAiD, IIC_VUNAiQ]>; +// VPADD/VPADDL are mapped later under IIC_SHLi. +// ... +// VCLSQ/VCLZQ/VCNTQ, takes two cycles. +def :ItinRW<[A9Write2V4, A9Read3], [IIC_VCNTiQ]>; +// VMOVimm/VMVNimm/VORRimm/VBICimm +def :ItinRW<[A9WriteV3], [IIC_VMOVImm]>; +def :ItinRW<[A9WriteV6, A9Read3, A9Read2], [IIC_VABAD, IIC_VABAQ]>; +def :ItinRW<[A9WriteV6, A9Read3], [IIC_VPALiD, IIC_VPALiQ]>; + +// NEON integer multiply +// +// Note: these don't quite match the timing docs, but they do match +// the original A9 itinerary. +def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VMULi16D]>; +def :ItinRW<[A9WriteV7, A9Read2, A9Read2], [IIC_VMULi16Q]>; +def :ItinRW<[A9Write2V7, A9Read2], [IIC_VMULi32D]>; +def :ItinRW<[A9Write2V9, A9Read2], [IIC_VMULi32Q]>; +def :ItinRW<[A9WriteV6, A9Read3, A9Read2, A9Read2], [IIC_VMACi16D]>; +def :ItinRW<[A9WriteV7, A9Read3, A9Read2, A9Read2], [IIC_VMACi16Q]>; +def :ItinRW<[A9Write2V7, A9Read3, A9Read2], [IIC_VMACi32D]>; +def :ItinRW<[A9Write2V9, A9Read3, A9Read2], [IIC_VMACi32Q]>; + +// NEON integer shift +// TODO: Q,Q,Q shifts should actually reserve FP for 2 cycles. +def :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>; +def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>; + +// NEON permute +def :ItinRW<[A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>; +def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2], + [IIC_VPERMQ3, IIC_VEXTQ]>; +def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>; +def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VTB2]>; +def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3], [IIC_VTB3]>; +def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3, A9Read3], [IIC_VTB4]>; +def :ItinRW<[A9WriteV3, ReadDefault, A9Read2], [IIC_VTBX1]>; +def :ItinRW<[A9WriteV3, ReadDefault, A9Read2, A9Read2], [IIC_VTBX2]>; +def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3], [IIC_VTBX3]>; +def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3, A9Read3], + [IIC_VTBX4]>; +// NEON floating-point +def :ItinRW<[A9WriteV5, A9Read2, A9Read2], [IIC_VBIND]>; +def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VBINQ]>; +def :ItinRW<[A9WriteV5, A9Read2], [IIC_VUNAD, IIC_VFMULD]>; +def :ItinRW<[A9WriteV6, A9Read2], [IIC_VUNAQ, IIC_VFMULQ]>; +def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>; +def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>; +def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>; +def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>; +} // SchedModel = CortexA9Model diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td new file mode 100644 index 0000000..e9bc3e0 --- /dev/null +++ b/lib/Target/ARM/ARMScheduleSwift.td @@ -0,0 +1,1085 @@ +//=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Swift processor.. +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// This section contains legacy support for itineraries. This is +// required until SD and PostRA schedulers are replaced by MachineScheduler. + +def SW_DIS0 : FuncUnit; +def SW_DIS1 : FuncUnit; +def SW_DIS2 : FuncUnit; + +def SW_ALU0 : FuncUnit; +def SW_ALU1 : FuncUnit; +def SW_LS : FuncUnit; +def SW_IDIV : FuncUnit; +def SW_FDIV : FuncUnit; + +// FIXME: Need bypasses. +// FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and +// IIC_iMOVix2ld better. +// FIXME: Model the special immediate shifts that are not microcoded. +// FIXME: Do we need to model the fact that uses of r15 in a micro-op force it +// to issue on pipe 1? +// FIXME: Model the pipelined behavior of CMP / TST instructions. +// FIXME: Better model the microcode stages of multiply instructions, especially +// conditional variants. +// FIXME: Add preload instruction when it is documented. +// FIXME: Model non-pipelined nature of FP div / sqrt unit. + +def SwiftItineraries : ProcessorItineraries< + [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [ + // + // Move instructions, unconditional + InstrItinData<IIC_iMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2]>, + InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [3]>, + InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_LS]>], + [5]>, + // + // MVN instructions + InstrItinData<IIC_iMVNi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMVNr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMVNsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMVNsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + // + // No operand cycles + InstrItinData<IIC_iALUx , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>]>, + // + // Binary Instructions that produce a result + InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1]>, + // + // Bitwise Instructions that produce a result + InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1]>, + // + // Unary Instructions that produce a result + + // CLZ, RBIT, etc. + InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + + // BFC, BFI, UBFX, SBFX + InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1]>, + + // + // Zero and sign extension instructions + InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1, 1, 1]>, + // + // Compare instructions + InstrItinData<IIC_iCMPi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iCMPr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iCMPsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<2, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iCMPsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<2, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + // + // Test instructions + InstrItinData<IIC_iTSTi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iTSTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iTSTsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<2, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iTSTsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<2, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + // + // Move instructions, conditional + // FIXME: Correctly model the extra input dep on the destination. + InstrItinData<IIC_iCMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iCMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2]>, + + // Integer multiply pipeline + // + InstrItinData<IIC_iMUL16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [3, 1, 1]>, + InstrItinData<IIC_iMAC16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [3, 1, 1, 1]>, + InstrItinData<IIC_iMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + InstrItinData<IIC_iMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1, 1]>, + InstrItinData<IIC_iMUL64 , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_ALU0], 3>, + InstrStage<1, [SW_ALU0]>], + [5, 5, 1, 1]>, + InstrItinData<IIC_iMAC64 , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [5, 6, 1, 1]>, + // + // Integer divide + InstrItinData<IIC_iDIV , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0], 0>, + InstrStage<14, [SW_IDIV]>], + [14, 1, 1]>, + + // Integer load pipeline + // FIXME: The timings are some rough approximations + // + // Immediate offset + InstrItinData<IIC_iLoad_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1]>, + InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1]>, + InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_LS]>], + [3, 4, 1]>, + // + // Register offset + InstrItinData<IIC_iLoad_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1, 1]>, + InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1, 1]>, + InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [3, 4, 1, 1]>, + // + // Scaled register offset + InstrItinData<IIC_iLoad_si , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS]>], + [5, 1, 1]>, + InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS]>], + [5, 1, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iLoad_iu , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [3, 1, 1]>, + InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [3, 1, 1]>, + // + // Register offset with update + InstrItinData<IIC_iLoad_ru , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_LS]>], + [3, 1, 1, 1]>, + InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_LS]>], + [3, 1, 1, 1]>, + InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [3, 4, 1, 1]>, + // + // Scaled register offset with update + InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [5, 3, 1, 1]>, + InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [5, 3, 1, 1]>, + // + // Load multiple, def is the 5th operand. + // FIXME: This assumes 3 to 4 registers. + InstrItinData<IIC_iLoad_m , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1, 3], [], -1>, // dynamic uops + + // + // Load multiple + update, defs are the 1st and 5th operands. + InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1, 3], [], -1>, // dynamic uops + // + // Load multiple plus branch + InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1, 3], [], -1>, // dynamic uops + // + // Pop, def is the 3rd operand. + InstrItinData<IIC_iPop , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 3], [], -1>, // dynamic uops + // + // Pop + branch, def is the 3rd operand. + InstrItinData<IIC_iPop_Br, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 3], [], -1>, // dynamic uops + + // + // iLoadi + iALUr for t2LDRpci_pic. + InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [4, 1]>, + + // Integer store pipeline + /// + // Immediate offset + InstrItinData<IIC_iStore_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + // + // Register offset + InstrItinData<IIC_iStore_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + // + // Scaled register offset + InstrItinData<IIC_iStore_si , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iStore_iu , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + // + // Register offset with update + InstrItinData<IIC_iStore_ru , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1]>, + InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1]>, + InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1]>, + // + // Scaled register offset with update + InstrItinData<IIC_iStore_siu, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>], + [3, 1, 1, 1]>, + InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>], + [3, 1, 1, 1]>, + // + // Store multiple + InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [], [], -1>, // dynamic uops + // + // Store multiple + update + InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [2], [], -1>, // dynamic uops + + // + // Preload + InstrItinData<IIC_Preload, [InstrStage<1, [SW_DIS0], 0>], [1, 1]>, + + // Branch + // + // no delay slots, so the latency of a branch is unimportant + InstrItinData<IIC_Br , [InstrStage<1, [SW_DIS0], 0>]>, + + // FP Special Register to Integer Register File Move + InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + // + // Single-precision FP Unary + // + // Most floating-point moves get issued on ALU0. + InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + // + // Double-precision FP Unary + InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + + // + // Single-precision FP Compare + InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [1, 1]>, + // + // Double-precision FP Compare + InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [1, 1]>, + // + // Single to Double FP Convert + InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Double to Single FP Convert + InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + + // + // Single to Half FP Convert + InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU1], 4>, + InstrStage<1, [SW_ALU1]>], + [6, 1]>, + // + // Half to Single FP Convert + InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + + // + // Single-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Double-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Integer to Single-Precision FP Convert + InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Integer to Double-Precision FP Convert + InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Single-precision FP ALU + InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-precision FP ALU + InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Single-precision FP Multiply + InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Double-precision FP Multiply + InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [6, 1, 1]>, + // + // Single-precision FP MAC + InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-precision FP MAC + InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [12, 1, 1]>, + // + // Single-precision Fused FP MAC + InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-precision Fused FP MAC + InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [12, 1, 1]>, + // + // Single-precision FP DIV + InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 0>, + InstrStage<15, [SW_FDIV]>], + [17, 1, 1]>, + // + // Double-precision FP DIV + InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 0>, + InstrStage<30, [SW_FDIV]>], + [32, 1, 1]>, + // + // Single-precision FP SQRT + InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 0>, + InstrStage<15, [SW_FDIV]>], + [17, 1]>, + // + // Double-precision FP SQRT + InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 0>, + InstrStage<30, [SW_FDIV]>], + [32, 1, 1]>, + + // + // Integer to Single-precision Move + InstrItinData<IIC_fpMOVIS, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0]>], + [6, 1]>, + // + // Integer to Double-precision Move + InstrItinData<IIC_fpMOVID, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [4, 1]>, + // + // Single-precision to Integer Move + InstrItinData<IIC_fpMOVSI, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1]>, + // + // Double-precision to Integer Move + InstrItinData<IIC_fpMOVDI, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_LS]>], + [3, 4, 1]>, + // + // Single-precision FP Load + InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [4, 1]>, + // + // Double-precision FP Load + InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [4, 1]>, + // + // FP Load Multiple + // FIXME: Assumes a single Q register. + InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 4], [], -1>, // dynamic uops + // + // FP Load Multiple + update + // FIXME: Assumes a single Q register. + InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1, 4], [], -1>, // dynamic uops + // + // Single-precision FP Store + InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + // + // Double-precision FP Store + InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + // + // FP Store Multiple + // FIXME: Assumes a single Q register. + InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1, 1], [], -1>, // dynamic uops + // + // FP Store Multiple + update + // FIXME: Assumes a single Q register. + InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1], [], -1>, // dynamic uops + // NEON + // + // Double-register Integer Unary + InstrItinData<IIC_VUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1]>, + // + // Quad-register Integer Unary + InstrItinData<IIC_VUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1]>, + // + // Double-register Integer Q-Unary + InstrItinData<IIC_VQUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1]>, + // + // Quad-register Integer CountQ-Unary + InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1]>, + // + // Double-register Integer Binary + InstrItinData<IIC_VBINiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Quad-register Integer Binary + InstrItinData<IIC_VBINiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-register Integer Subtract + InstrItinData<IIC_VSUBiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Quad-register Integer Subtract + InstrItinData<IIC_VSUBiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-register Integer Shift + InstrItinData<IIC_VSHLiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Quad-register Integer Shift + InstrItinData<IIC_VSHLiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Double-register Integer Binary (4 cycle) + InstrItinData<IIC_VBINi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register Integer Binary (4 cycle) + InstrItinData<IIC_VBINi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Double-register Integer Subtract (4 cycle) + InstrItinData<IIC_VSUBi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register Integer Subtract (4 cycle) + InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + + // + // Double-register Integer Count + InstrItinData<IIC_VCNTiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Quad-register Integer Count + InstrItinData<IIC_VCNTiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-register Absolute Difference and Accumulate + InstrItinData<IIC_VABAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1, 1]>, + // + // Quad-register Absolute Difference and Accumulate + InstrItinData<IIC_VABAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1, 1]>, + // + // Double-register Integer Pair Add Long + InstrItinData<IIC_VPALiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register Integer Pair Add Long + InstrItinData<IIC_VPALiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + + // + // Double-register Integer Multiply (.8, .16) + InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Quad-register Integer Multiply (.8, .16) + InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + + // + // Double-register Integer Multiply (.32) + InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Quad-register Integer Multiply (.32) + InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Double-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1, 1]>, + // + // Double-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1, 1]>, + // + // Quad-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1, 1]>, + // + // Quad-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1, 1]>, + + // + // Move + InstrItinData<IIC_VMOV, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + // + // Move Immediate + InstrItinData<IIC_VMOVImm, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2]>, + // + // Double-register Permute Move + InstrItinData<IIC_VMOVD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1]>, + // + // Quad-register Permute Move + InstrItinData<IIC_VMOVQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1]>, + // + // Integer to Single-precision Move + InstrItinData<IIC_VMOVIS , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0]>], + [6, 1]>, + // + // Integer to Double-precision Move + InstrItinData<IIC_VMOVID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [4, 1, 1]>, + // + // Single-precision to Integer Move + InstrItinData<IIC_VMOVSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1]>, + // + // Double-precision to Integer Move + InstrItinData<IIC_VMOVDI , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_LS]>], + [3, 4, 1]>, + // + // Integer to Lane Move + // FIXME: I think this is correct, but it is not clear from the tuning guide. + InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0]>], + [6, 1]>, + + // + // Vector narrow move + InstrItinData<IIC_VMOVN, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1]>, + // + // Double-register FP Unary + // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here, + // and they issue on a different pipeline. + InstrItinData<IIC_VUNAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + // + // Quad-register FP Unary + // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here, + // and they issue on a different pipeline. + InstrItinData<IIC_VUNAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + // + // Double-register FP Binary + // FIXME: We're using this itin for many instructions. + InstrItinData<IIC_VBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + + // + // VPADD, etc. + InstrItinData<IIC_VPBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Double-register FP VMUL + InstrItinData<IIC_VFMULD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Quad-register FP Binary + InstrItinData<IIC_VBINQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register FP VMUL + InstrItinData<IIC_VFMULQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Double-register FP Multiple-Accumulate + InstrItinData<IIC_VMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Quad-register FP Multiple-Accumulate + InstrItinData<IIC_VMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-register Fused FP Multiple-Accumulate + InstrItinData<IIC_VFMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Quad-register FusedF P Multiple-Accumulate + InstrItinData<IIC_VFMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-register Reciprical Step + InstrItinData<IIC_VRECSD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Quad-register Reciprical Step + InstrItinData<IIC_VRECSQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-register Permute + // FIXME: The latencies are unclear from the documentation. + InstrItinData<IIC_VPERMD, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [3, 4, 3, 4]>, + // + // Quad-register Permute + // FIXME: The latencies are unclear from the documentation. + InstrItinData<IIC_VPERMQ, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [3, 4, 3, 4]>, + // + // Quad-register Permute (3 cycle issue on A9) + InstrItinData<IIC_VPERMQ3, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [3, 4, 3, 4]>, + + // + // Double-register VEXT + InstrItinData<IIC_VEXTD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1, 1]>, + // + // Quad-register VEXT + InstrItinData<IIC_VEXTQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1, 1]>, + // + // VTB + InstrItinData<IIC_VTB1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_VTB2, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 3, 3]>, + InstrItinData<IIC_VTB3, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [6, 1, 3, 5, 5]>, + InstrItinData<IIC_VTB4, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 3, 5, 7, 7]>, + // + // VTBX + InstrItinData<IIC_VTBX1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_VTBX2, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 3, 3]>, + InstrItinData<IIC_VTBX3, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [6, 1, 3, 5, 5]>, + InstrItinData<IIC_VTBX4, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 3, 5, 7, 7]> +]>; + +// ===---------------------------------------------------------------------===// +// This following definitions describe the simple machine model which +// will replace itineraries. + +// Swift machine model for scheduling and other instruction cost heuristics. +def SwiftModel : SchedMachineModel { + let IssueWidth = 3; // 3 micro-ops are dispatched per cycle. + let MinLatency = 0; // Data dependencies are allowed within dispatch groups. + let LoadLatency = 3; + + let Itineraries = SwiftItineraries; +} + +// TODO: Add Swift processor and scheduler resources. diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 31d5d38..41a7e0c 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -13,8 +13,8 @@ #define DEBUG_TYPE "arm-selectiondag-info" #include "ARMTargetMachine.h" -#include "llvm/DerivedTypes.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/IR/DerivedTypes.h" using namespace llvm; ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM) @@ -155,7 +155,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, TargetLowering::ArgListEntry Entry; // First argument: data pointer - Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*DAG.getContext()); + Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*DAG.getContext()); Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 89e29ad..058d4c4 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #include "ARMSubtarget.h" +#include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" -#include "llvm/GlobalValue.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR @@ -31,6 +32,10 @@ static cl::opt<bool> DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden); static cl::opt<bool> +UseFusedMulOps("arm-use-mulops", + cl::init(true), cl::Hidden); + +static cl::opt<bool> StrictAlign("arm-strict-align", cl::Hidden, cl::desc("Disallow all unaligned memory accesses")); @@ -49,6 +54,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , HasVFPv4(false) , HasNEON(false) , UseNEONForSinglePrecisionFP(false) + , UseMulOps(UseFusedMulOps) , SlowFPVMLx(false) , HasVMLxForwarding(false) , SlowFPBrcc(false) @@ -63,10 +69,12 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , HasFP16(false) , HasD16(false) , HasHardwareDivide(false) + , HasHardwareDivideInARM(false) , HasT2ExtractPack(false) , HasDataBarrier(false) , Pref32BitThumb(false) , AvoidCPSRPartialUpdate(false) + , AvoidMOVsShifterOperand(false) , HasRAS(false) , HasMPExtension(false) , FPOnlySP(false) diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index b394061..64878cd 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -15,9 +15,9 @@ #define ARMSUBTARGET_H #include "MCTargetDesc/ARMMCTargetDesc.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/MC/MCInstrItineraries.h" #include "llvm/ADT/Triple.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <string> #define GET_SUBTARGETINFO_HEADER @@ -30,7 +30,7 @@ class StringRef; class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { - Others, CortexA8, CortexA9 + Others, CortexA5, CortexA8, CortexA9, CortexA15, CortexR5, Swift }; /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. @@ -57,6 +57,10 @@ protected: /// determine if NEON should actually be used. bool UseNEONForSinglePrecisionFP; + /// UseMulOps - True if non-microcoded fused integer multiply-add and + /// multiply-subtract instructions should be used. + bool UseMulOps; + /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates /// whether the FP VML[AS] instructions are slow (if so, don't use them). bool SlowFPVMLx; @@ -107,6 +111,9 @@ protected: /// HasHardwareDivide - True if subtarget supports [su]div bool HasHardwareDivide; + /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode + bool HasHardwareDivideInARM; + /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack /// instructions. bool HasT2ExtractPack; @@ -124,6 +131,10 @@ protected: /// CPSR setting instruction. bool AvoidCPSRPartialUpdate; + /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting + /// movs with shifter operand (i.e. asr, lsl, lsr). + bool AvoidMOVsShifterOperand; + /// HasRAS - Some processors perform return stack prediction. CodeGen should /// avoid issue "normal" call instructions to callees which do not return. bool HasRAS; @@ -197,9 +208,14 @@ protected: bool hasV6T2Ops() const { return HasV6T2Ops; } bool hasV7Ops() const { return HasV7Ops; } + bool isCortexA5() const { return ARMProcFamily == CortexA5; } bool isCortexA8() const { return ARMProcFamily == CortexA8; } bool isCortexA9() const { return ARMProcFamily == CortexA9; } + bool isCortexA15() const { return ARMProcFamily == CortexA15; } + bool isSwift() const { return ARMProcFamily == Swift; } bool isCortexM3() const { return CPUString == "cortex-m3"; } + bool isLikeA9() const { return isCortexA9() || isCortexA15(); } + bool isCortexR5() const { return ARMProcFamily == CortexR5; } bool hasARMOps() const { return !NoARM; } @@ -211,14 +227,17 @@ protected: return hasNEON() && UseNEONForSinglePrecisionFP; } bool hasDivide() const { return HasHardwareDivide; } + bool hasDivideInARMMode() const { return HasHardwareDivideInARM; } bool hasT2ExtractPack() const { return HasT2ExtractPack; } bool hasDataBarrier() const { return HasDataBarrier; } + bool useMulOps() const { return UseMulOps; } bool useFPVMLx() const { return !SlowFPVMLx; } bool hasVMLxForwarding() const { return HasVMLxForwarding; } bool isFPBrccSlow() const { return SlowFPBrcc; } bool isFPOnlySP() const { return FPOnlySP; } bool prefers32BitThumb() const { return Pref32BitThumb; } bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } + bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; } bool hasRAS() const { return HasRAS; } bool hasMPExtension() const { return HasMPExtension; } bool hasThumb2DSP() const { return Thumb2DSP; } @@ -231,7 +250,7 @@ protected: bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } bool isTargetNaCl() const { - return TargetTriple.getOS() == Triple::NativeClient; + return TargetTriple.getOS() == Triple::NaCl; } bool isTargetELF() const { return !isTargetDarwin(); } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 171c9ad..7745218 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -11,11 +11,11 @@ //===----------------------------------------------------------------------===// #include "ARMTargetMachine.h" -#include "ARMFrameLowering.h" #include "ARM.h" -#include "llvm/PassManager.h" +#include "ARMFrameLowering.h" #include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" @@ -51,6 +51,15 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, this->Options.FloatABIType = FloatABI::Soft; } +void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) { + // Add first the target-independent BasicTTI pass, then our ARM pass. This + // allows the ARM pass to delegate to the target independent layer when + // appropriate. + PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createARMTargetTransformInfoPass(this)); +} + + void ARMTargetMachine::anchor() { } ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, @@ -60,7 +69,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL) : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), InstrInfo(Subtarget), - DataLayout(Subtarget.isAPCS_ABI() ? + DL(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:64-i64:32:64-" "v128:32:128-v64:32:64-n32-S32") : Subtarget.isAAPCS_ABI() ? @@ -68,7 +77,6 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, "v128:64:128-v64:64:64-n32-S64") : std::string("e-p:32:32-f64:64:64-i64:64:64-" "v128:64:128-v64:64:64-n32-S32")), - ELFWriterInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget) { @@ -88,7 +96,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, InstrInfo(Subtarget.hasThumb2() ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget)) : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), - DataLayout(Subtarget.isAPCS_ABI() ? + DL(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:64-i64:32:64-" "i16:16:32-i8:8:32-i1:8:32-" "v128:32:128-v64:32:64-a:0:32-n32-S32") : @@ -99,7 +107,6 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, std::string("e-p:32:32-f64:64:64-i64:64:64-" "i16:16:32-i8:8:32-i1:8:32-" "v128:64:128-v64:64:64-a:0:32-n32-S32")), - ELFWriterInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget.hasThumb2() @@ -143,6 +150,11 @@ bool ARMPassConfig::addPreISel() { bool ARMPassConfig::addInstSelector() { addPass(createARMISelDag(getARMTargetMachine(), getOptLevel())); + + const ARMSubtarget *Subtarget = &getARMSubtarget(); + if (Subtarget->isTargetELF() && !Subtarget->isThumb1Only() && + TM->Options.EnableFastISel) + addPass(createARMGlobalBaseRegPass()); return false; } @@ -150,7 +162,7 @@ bool ARMPassConfig::addPreRegAlloc() { // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only()) addPass(createARMLoadStoreOptimizationPass(true)); - if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9()) + if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9()) addPass(createMLxExpansionPass()); return true; } diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index abcdb24..be6bec7 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -14,20 +14,19 @@ #ifndef ARMTARGETMACHINE_H #define ARMTARGETMACHINE_H -#include "ARMInstrInfo.h" -#include "ARMELFWriterInfo.h" #include "ARMFrameLowering.h" -#include "ARMJITInfo.h" -#include "ARMSubtarget.h" #include "ARMISelLowering.h" +#include "ARMInstrInfo.h" +#include "ARMJITInfo.h" #include "ARMSelectionDAGInfo.h" -#include "Thumb1InstrInfo.h" +#include "ARMSubtarget.h" #include "Thumb1FrameLowering.h" +#include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetMachine.h" namespace llvm { @@ -51,6 +50,9 @@ public: return &InstrItins; } + /// \brief Register ARM analysis passes with a pass manager. + virtual void addAnalysisPasses(PassManagerBase &PM); + // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); @@ -62,8 +64,7 @@ public: class ARMTargetMachine : public ARMBaseTargetMachine { virtual void anchor(); ARMInstrInfo InstrInfo; - const TargetData DataLayout; // Calculates type size & alignment - ARMELFWriterInfo ELFWriterInfo; + const DataLayout DL; // Calculates type size & alignment ARMTargetLowering TLInfo; ARMSelectionDAGInfo TSInfo; ARMFrameLowering FrameLowering; @@ -88,12 +89,8 @@ class ARMTargetMachine : public ARMBaseTargetMachine { virtual const ARMFrameLowering *getFrameLowering() const { return &FrameLowering; } - virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const TargetData *getTargetData() const { return &DataLayout; } - virtual const ARMELFWriterInfo *getELFWriterInfo() const { - return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; - } + virtual const DataLayout *getDataLayout() const { return &DL; } }; /// ThumbTargetMachine - Thumb target machine. @@ -104,8 +101,7 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { virtual void anchor(); // Either Thumb1InstrInfo or Thumb2InstrInfo. OwningPtr<ARMBaseInstrInfo> InstrInfo; - const TargetData DataLayout; // Calculates type size & alignment - ARMELFWriterInfo ELFWriterInfo; + const DataLayout DL; // Calculates type size & alignment ARMTargetLowering TLInfo; ARMSelectionDAGInfo TSInfo; // Either Thumb1FrameLowering or ARMFrameLowering. @@ -138,10 +134,7 @@ public: virtual const ARMFrameLowering *getFrameLowering() const { return FrameLowering.get(); } - virtual const TargetData *getTargetData() const { return &DataLayout; } - virtual const ARMELFWriterInfo *getELFWriterInfo() const { - return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; - } + virtual const DataLayout *getDataLayout() const { return &DL; } }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 3d85ca7..dfdf6ab 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -9,12 +9,14 @@ #include "ARMTargetObjectFile.h" #include "ARMSubtarget.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ELF.h" +#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/StringExtras.h" using namespace llvm; using namespace dwarf; @@ -38,3 +40,14 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, 0, SectionKind::getMetadata()); } + +const MCExpr *ARMElfTargetObjectFile:: +getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, unsigned Encoding, + MCStreamer &Streamer) const { + assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only"); + + return MCSymbolRefExpr::Create(Mang->getSymbol(GV), + MCSymbolRefExpr::VK_ARM_TARGET2, + getContext()); +} diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h index c6a7261..7f60727 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.h +++ b/lib/Target/ARM/ARMTargetObjectFile.h @@ -28,6 +28,11 @@ public: virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + const MCExpr * + getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, unsigned Encoding, + MCStreamer &Streamer) const; + virtual const MCSection *getAttributesSection() const { return AttributesSection; } diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp new file mode 100644 index 0000000..03a23be --- /dev/null +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -0,0 +1,124 @@ +//===-- ARMTargetTransformInfo.cpp - ARM specific TTI pass ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// ARM target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "armtti" +#include "ARM.h" +#include "ARMTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +// Declare the pass initialization routine locally as target-specific passes +// don't havve a target-wide initialization entry point, and so we rely on the +// pass constructor initialization. +namespace llvm { +void initializeARMTTIPass(PassRegistry &); +} + +namespace { + +class ARMTTI : public ImmutablePass, public TargetTransformInfo { + const ARMBaseTargetMachine *TM; + const ARMSubtarget *ST; + + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + +public: + ARMTTI() : ImmutablePass(ID), TM(0), ST(0) { + llvm_unreachable("This pass cannot be directly constructed"); + } + + ARMTTI(const ARMBaseTargetMachine *TM) + : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()) { + initializeARMTTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + pushTTIStack(this); + } + + virtual void finalizePass() { + popTTIStack(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + TargetTransformInfo::getAnalysisUsage(AU); + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + /// \name Scalar TTI Implementations + /// @{ + + virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const; + + /// @} +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(ARMTTI, TargetTransformInfo, "armtti", + "ARM Target Transform Info", true, true, false) +char ARMTTI::ID = 0; + +ImmutablePass * +llvm::createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM) { + return new ARMTTI(TM); +} + + +unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const { + assert(Ty->isIntegerTy()); + + unsigned Bits = Ty->getPrimitiveSizeInBits(); + if (Bits == 0 || Bits > 32) + return 4; + + int32_t SImmVal = Imm.getSExtValue(); + uint32_t ZImmVal = Imm.getZExtValue(); + if (!ST->isThumb()) { + if ((SImmVal >= 0 && SImmVal < 65536) || + (ARM_AM::getSOImmVal(ZImmVal) != -1) || + (ARM_AM::getSOImmVal(~ZImmVal) != -1)) + return 1; + return ST->hasV6T2Ops() ? 2 : 3; + } else if (ST->isThumb2()) { + if ((SImmVal >= 0 && SImmVal < 65536) || + (ARM_AM::getT2SOImmVal(ZImmVal) != -1) || + (ARM_AM::getT2SOImmVal(~ZImmVal) != -1)) + return 1; + return ST->hasV6T2Ops() ? 2 : 3; + } else /*Thumb1*/ { + if (SImmVal >= 0 && SImmVal < 256) + return 1; + if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal)) + return 2; + // Load from constantpool. + return 3; + } + return 2; +} diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp deleted file mode 100644 index fda8536..0000000 --- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp +++ /dev/null @@ -1,138 +0,0 @@ -//===-- ARMAsmLexer.cpp - Tokenize ARM assembly to AsmTokens --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/ARMBaseInfo.h" - -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" -#include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCTargetAsmLexer.h" - -#include "llvm/Support/TargetRegistry.h" - -#include "llvm/ADT/StringSwitch.h" - -#include <string> -#include <map> - -using namespace llvm; - -namespace { - -class ARMBaseAsmLexer : public MCTargetAsmLexer { - const MCAsmInfo &AsmInfo; - - const AsmToken &lexDefinite() { - return getLexer()->Lex(); - } - - AsmToken LexTokenUAL(); -protected: - typedef std::map <std::string, unsigned> rmap_ty; - - rmap_ty RegisterMap; - - void InitRegisterMap(const MCRegisterInfo *info) { - unsigned numRegs = info->getNumRegs(); - - for (unsigned i = 0; i < numRegs; ++i) { - const char *regName = info->getName(i); - if (regName) - RegisterMap[regName] = i; - } - } - - unsigned MatchRegisterName(StringRef Name) { - rmap_ty::iterator iter = RegisterMap.find(Name.str()); - if (iter != RegisterMap.end()) - return iter->second; - else - return 0; - } - - AsmToken LexToken() { - if (!Lexer) { - SetError(SMLoc(), "No MCAsmLexer installed"); - return AsmToken(AsmToken::Error, "", 0); - } - - switch (AsmInfo.getAssemblerDialect()) { - default: - SetError(SMLoc(), "Unhandled dialect"); - return AsmToken(AsmToken::Error, "", 0); - case 0: - return LexTokenUAL(); - } - } -public: - ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI) - : MCTargetAsmLexer(T), AsmInfo(MAI) { - } -}; - -class ARMAsmLexer : public ARMBaseAsmLexer { -public: - ARMAsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI) - : ARMBaseAsmLexer(T, MAI) { - InitRegisterMap(&MRI); - } -}; - -class ThumbAsmLexer : public ARMBaseAsmLexer { -public: - ThumbAsmLexer(const Target &T, const MCRegisterInfo &MRI,const MCAsmInfo &MAI) - : ARMBaseAsmLexer(T, MAI) { - InitRegisterMap(&MRI); - } -}; - -} // end anonymous namespace - -AsmToken ARMBaseAsmLexer::LexTokenUAL() { - const AsmToken &lexedToken = lexDefinite(); - - switch (lexedToken.getKind()) { - default: break; - case AsmToken::Error: - SetError(Lexer->getErrLoc(), Lexer->getErr()); - break; - case AsmToken::Identifier: { - std::string lowerCase = lexedToken.getString().lower(); - - unsigned regID = MatchRegisterName(lowerCase); - // Check for register aliases. - // r13 -> sp - // r14 -> lr - // r15 -> pc - // ip -> r12 - // FIXME: Some assemblers support lots of others. Do we want them all? - if (!regID) { - regID = StringSwitch<unsigned>(lowerCase) - .Case("r13", ARM::SP) - .Case("r14", ARM::LR) - .Case("r15", ARM::PC) - .Case("ip", ARM::R12) - .Default(0); - } - - if (regID) - return AsmToken(AsmToken::Register, - lexedToken.getString(), - static_cast<int64_t>(regID)); - } - } - - return AsmToken(lexedToken); -} - -extern "C" void LLVMInitializeARMAsmLexer() { - RegisterMCAsmLexer<ARMAsmLexer> X(TheARMTarget); - RegisterMCAsmLexer<ThumbAsmLexer> Y(TheThumbTarget); -} diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index e1e2f6e..ad37a21 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -7,31 +7,31 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARMBaseInfo.h" +#include "llvm/MC/MCTargetAsmParser.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMMCExpr.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" -#include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" using namespace llvm; @@ -178,7 +178,8 @@ class ARMAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index); + OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, + SMLoc &EndLoc); // Asm Match Converter Methods void cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); @@ -253,21 +254,17 @@ public: // Implementation of the MCTargetAsmParser interface: bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); - bool ParseInstruction(StringRef Name, SMLoc NameLoc, + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); bool ParseDirective(AsmToken DirectiveID); unsigned checkTargetMatchPredicate(MCInst &Inst); - bool MatchAndEmitInstruction(SMLoc IDLoc, + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out); - - unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - unsigned OperandNum, unsigned &NumMCOperands) { - return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum, NumMCOperands); - } + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm); }; } // end anonymous namespace @@ -487,7 +484,8 @@ public: SMLoc getStartLoc() const { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } - + /// getLocRange - Get the range between the first and last token of this + /// operand. SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } ARMCC::CondCodes getCondCode() const { @@ -863,7 +861,7 @@ public: bool isSPRRegList() const { return Kind == k_SPRRegisterList; } bool isToken() const { return Kind == k_Token; } bool isMemBarrierOpt() const { return Kind == k_MemBarrierOpt; } - bool isMemory() const { return Kind == k_Memory; } + bool isMem() const { return Kind == k_Memory; } bool isShifterImm() const { return Kind == k_ShifterImmediate; } bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; } bool isRegShiftedImm() const { return Kind == k_ShiftedImmediate; } @@ -874,14 +872,14 @@ public: return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy ==ARM_AM::no_shift; } bool isMemNoOffset(bool alignOK = false) const { - if (!isMemory()) + if (!isMem()) return false; // No offset of any kind. return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 && (alignOK || Memory.Alignment == 0); } bool isMemPCRelImm12() const { - if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Base register must be PC. if (Memory.BaseRegNum != ARM::PC) @@ -895,7 +893,7 @@ public: return isMemNoOffset(true); } bool isAddrMode2() const { - if (!isMemory() || Memory.Alignment != 0) return false; + if (!isMem() || Memory.Alignment != 0) return false; // Check for register offset. if (Memory.OffsetRegNum) return true; // Immediate offset in range [-4095, 4095]. @@ -917,7 +915,7 @@ public: // and we reject it. if (isImm() && !isa<MCConstantExpr>(getImm())) return true; - if (!isMemory() || Memory.Alignment != 0) return false; + if (!isMem() || Memory.Alignment != 0) return false; // No shifts are legal for AM3. if (Memory.ShiftType != ARM_AM::no_shift) return false; // Check for register offset. @@ -947,7 +945,7 @@ public: // and we reject it. if (isImm() && !isa<MCConstantExpr>(getImm())) return true; - if (!isMemory() || Memory.Alignment != 0) return false; + if (!isMem() || Memory.Alignment != 0) return false; // Check for register offset. if (Memory.OffsetRegNum) return false; // Immediate offset in range [-1020, 1020] and a multiple of 4. @@ -957,25 +955,25 @@ public: Val == INT32_MIN; } bool isMemTBB() const { - if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative || + if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative || Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0) return false; return true; } bool isMemTBH() const { - if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative || + if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative || Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm != 1 || Memory.Alignment != 0 ) return false; return true; } bool isMemRegOffset() const { - if (!isMemory() || !Memory.OffsetRegNum || Memory.Alignment != 0) + if (!isMem() || !Memory.OffsetRegNum || Memory.Alignment != 0) return false; return true; } bool isT2MemRegOffset() const { - if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative || + if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative || Memory.Alignment != 0) return false; // Only lsl #{0, 1, 2, 3} allowed. @@ -988,14 +986,14 @@ public: bool isMemThumbRR() const { // Thumb reg+reg addressing is simple. Just two registers, a base and // an offset. No shifts, negations or any other complicating factors. - if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative || + if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative || Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0) return false; return isARMLowRegister(Memory.BaseRegNum) && (!Memory.OffsetRegNum || isARMLowRegister(Memory.OffsetRegNum)); } bool isMemThumbRIs4() const { - if (!isMemory() || Memory.OffsetRegNum != 0 || + if (!isMem() || Memory.OffsetRegNum != 0 || !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0) return false; // Immediate offset, multiple of 4 in range [0, 124]. @@ -1004,7 +1002,7 @@ public: return Val >= 0 && Val <= 124 && (Val % 4) == 0; } bool isMemThumbRIs2() const { - if (!isMemory() || Memory.OffsetRegNum != 0 || + if (!isMem() || Memory.OffsetRegNum != 0 || !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0) return false; // Immediate offset, multiple of 4 in range [0, 62]. @@ -1013,7 +1011,7 @@ public: return Val >= 0 && Val <= 62 && (Val % 2) == 0; } bool isMemThumbRIs1() const { - if (!isMemory() || Memory.OffsetRegNum != 0 || + if (!isMem() || Memory.OffsetRegNum != 0 || !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0) return false; // Immediate offset in range [0, 31]. @@ -1022,7 +1020,7 @@ public: return Val >= 0 && Val <= 31; } bool isMemThumbSPI() const { - if (!isMemory() || Memory.OffsetRegNum != 0 || + if (!isMem() || Memory.OffsetRegNum != 0 || Memory.BaseRegNum != ARM::SP || Memory.Alignment != 0) return false; // Immediate offset, multiple of 4 in range [0, 1020]. @@ -1036,7 +1034,7 @@ public: // and we reject it. if (isImm() && !isa<MCConstantExpr>(getImm())) return true; - if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Immediate offset a multiple of 4 in range [-1020, 1020]. if (!Memory.OffsetImm) return true; @@ -1045,7 +1043,7 @@ public: return (Val >= -1020 && Val <= 1020 && (Val & 3) == 0) || Val == INT32_MIN; } bool isMemImm0_1020s4Offset() const { - if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Immediate offset a multiple of 4 in range [0, 1020]. if (!Memory.OffsetImm) return true; @@ -1053,7 +1051,7 @@ public: return Val >= 0 && Val <= 1020 && (Val & 3) == 0; } bool isMemImm8Offset() const { - if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Base reg of PC isn't allowed for these encodings. if (Memory.BaseRegNum == ARM::PC) return false; @@ -1063,7 +1061,7 @@ public: return (Val == INT32_MIN) || (Val > -256 && Val < 256); } bool isMemPosImm8Offset() const { - if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Immediate offset in range [0, 255]. if (!Memory.OffsetImm) return true; @@ -1071,7 +1069,7 @@ public: return Val >= 0 && Val < 256; } bool isMemNegImm8Offset() const { - if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Base reg of PC isn't allowed for these encodings. if (Memory.BaseRegNum == ARM::PC) return false; @@ -1081,7 +1079,7 @@ public: return (Val == INT32_MIN) || (Val > -256 && Val < 0); } bool isMemUImm12Offset() const { - if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Immediate offset in range [0, 4095]. if (!Memory.OffsetImm) return true; @@ -1095,7 +1093,7 @@ public: if (isImm() && !isa<MCConstantExpr>(getImm())) return true; - if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Immediate offset in range [-4095, 4095]. if (!Memory.OffsetImm) return true; @@ -2453,8 +2451,8 @@ static unsigned MatchRegisterName(StringRef Name); bool ARMAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { StartLoc = Parser.getTok().getLoc(); + EndLoc = Parser.getTok().getEndLoc(); RegNo = tryParseRegister(); - EndLoc = Parser.getTok().getLoc(); return (RegNo == (unsigned)-1); } @@ -2543,6 +2541,8 @@ int ARMAsmParser::tryParseShiftRegister( if (!PrevOp->isReg()) return Error(PrevOp->getStartLoc(), "shift must be of a register"); int SrcReg = PrevOp->getReg(); + + SMLoc EndLoc; int64_t Imm = 0; int ShiftReg = 0; if (ShiftTy == ARM_AM::rrx) { @@ -2557,7 +2557,7 @@ int ARMAsmParser::tryParseShiftRegister( Parser.Lex(); // Eat hash. SMLoc ImmLoc = Parser.getTok().getLoc(); const MCExpr *ShiftExpr = 0; - if (getParser().ParseExpression(ShiftExpr)) { + if (getParser().ParseExpression(ShiftExpr, EndLoc)) { Error(ImmLoc, "invalid immediate shift value"); return -1; } @@ -2582,8 +2582,9 @@ int ARMAsmParser::tryParseShiftRegister( if (Imm == 0) ShiftTy = ARM_AM::lsl; } else if (Parser.getTok().is(AsmToken::Identifier)) { - ShiftReg = tryParseRegister(); SMLoc L = Parser.getTok().getLoc(); + EndLoc = Parser.getTok().getEndLoc(); + ShiftReg = tryParseRegister(); if (ShiftReg == -1) { Error (L, "expected immediate or register in shift operand"); return -1; @@ -2598,10 +2599,10 @@ int ARMAsmParser::tryParseShiftRegister( if (ShiftReg && ShiftTy != ARM_AM::rrx) Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg, ShiftReg, Imm, - S, Parser.getTok().getLoc())); + S, EndLoc)); else Operands.push_back(ARMOperand::CreateShiftedImmediate(ShiftTy, SrcReg, Imm, - S, Parser.getTok().getLoc())); + S, EndLoc)); return 0; } @@ -2615,12 +2616,13 @@ int ARMAsmParser::tryParseShiftRegister( /// parse for a specific register type. bool ARMAsmParser:: tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - SMLoc S = Parser.getTok().getLoc(); + const AsmToken &RegTok = Parser.getTok(); int RegNo = tryParseRegister(); if (RegNo == -1) return true; - Operands.push_back(ARMOperand::CreateReg(RegNo, S, Parser.getTok().getLoc())); + Operands.push_back(ARMOperand::CreateReg(RegNo, RegTok.getLoc(), + RegTok.getEndLoc())); const AsmToken &ExclaimTok = Parser.getTok(); if (ExclaimTok.is(AsmToken::Exclaim)) { @@ -2644,10 +2646,10 @@ tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!MCE) return TokError("immediate value expected for vector index"); - SMLoc E = Parser.getTok().getLoc(); if (Parser.getTok().isNot(AsmToken::RBrac)) - return Error(E, "']' expected"); + return Error(Parser.getTok().getLoc(), "']' expected"); + SMLoc E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat right bracket token. Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(), @@ -2797,7 +2799,7 @@ parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Check for and consume the closing '}' if (Parser.getTok().isNot(AsmToken::RCurly)) return MatchOperand_ParseFail; - SMLoc E = Parser.getTok().getLoc(); + SMLoc E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat the '}' Operands.push_back(ARMOperand::CreateCoprocOption(Val, S, E)); @@ -2894,10 +2896,10 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.getTok().is(AsmToken::Minus)) { if (Parser.getTok().is(AsmToken::Minus)) { Parser.Lex(); // Eat the minus. - SMLoc EndLoc = Parser.getTok().getLoc(); + SMLoc AfterMinusLoc = Parser.getTok().getLoc(); int EndReg = tryParseRegister(); if (EndReg == -1) - return Error(EndLoc, "register expected"); + return Error(AfterMinusLoc, "register expected"); // Allow Q regs and just interpret them as the two D sub-registers. if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg)) EndReg = getDRegFromQReg(EndReg) + 1; @@ -2907,10 +2909,10 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { continue; // The register must be in the same register class as the first. if (!RC->contains(EndReg)) - return Error(EndLoc, "invalid register in register list"); + return Error(AfterMinusLoc, "invalid register in register list"); // Ranges must go from low to high. if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg)) - return Error(EndLoc, "bad range in register list"); + return Error(AfterMinusLoc, "bad range in register list"); // Add all the registers in the range to the register list. while (Reg != EndReg) { @@ -2958,9 +2960,9 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Registers.push_back(std::pair<unsigned, SMLoc>(++Reg, RegLoc)); } - SMLoc E = Parser.getTok().getLoc(); if (Parser.getTok().isNot(AsmToken::RCurly)) - return Error(E, "'}' expected"); + return Error(Parser.getTok().getLoc(), "'}' expected"); + SMLoc E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat '}' token. // Push the register list operand. @@ -2977,13 +2979,14 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Helper function to parse the lane index for vector lists. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) { +parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) { Index = 0; // Always return a defined index value. if (Parser.getTok().is(AsmToken::LBrac)) { Parser.Lex(); // Eat the '['. if (Parser.getTok().is(AsmToken::RBrac)) { // "Dn[]" is the 'all lanes' syntax. LaneKind = AllLanes; + EndLoc = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat the ']'. return MatchOperand_Success; } @@ -3008,6 +3011,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) { Error(Parser.getTok().getLoc(), "']' expected"); return MatchOperand_ParseFail; } + EndLoc = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat the ']'. int64_t Val = CE->getValue(); @@ -3034,21 +3038,19 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // (without encosing curly braces) as a single or double entry list, // respectively. if (Parser.getTok().is(AsmToken::Identifier)) { + SMLoc E = Parser.getTok().getEndLoc(); int Reg = tryParseRegister(); if (Reg == -1) return MatchOperand_NoMatch; - SMLoc E = Parser.getTok().getLoc(); if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) { - OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex); + OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex, E); if (Res != MatchOperand_Success) return Res; switch (LaneKind) { case NoLanes: - E = Parser.getTok().getLoc(); Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E)); break; case AllLanes: - E = Parser.getTok().getLoc(); Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, false, S, E)); break; @@ -3062,18 +3064,16 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { Reg = getDRegFromQReg(Reg); - OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex); + OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex, E); if (Res != MatchOperand_Success) return Res; switch (LaneKind) { case NoLanes: - E = Parser.getTok().getLoc(); Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0, &ARMMCRegisterClasses[ARM::DPairRegClassID]); Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E)); break; case AllLanes: - E = Parser.getTok().getLoc(); Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0, &ARMMCRegisterClasses[ARM::DPairRegClassID]); Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false, @@ -3114,7 +3114,9 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ++Reg; ++Count; } - if (parseVectorLane(LaneKind, LaneIndex) != MatchOperand_Success) + + SMLoc E; + if (parseVectorLane(LaneKind, LaneIndex, E) != MatchOperand_Success) return MatchOperand_ParseFail; while (Parser.getTok().is(AsmToken::Comma) || @@ -3128,10 +3130,10 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_ParseFail; } Parser.Lex(); // Eat the minus. - SMLoc EndLoc = Parser.getTok().getLoc(); + SMLoc AfterMinusLoc = Parser.getTok().getLoc(); int EndReg = tryParseRegister(); if (EndReg == -1) { - Error(EndLoc, "register expected"); + Error(AfterMinusLoc, "register expected"); return MatchOperand_ParseFail; } // Allow Q regs and just interpret them as the two D sub-registers. @@ -3143,24 +3145,24 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { continue; // The register must be in the same register class as the first. if (!ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg)) { - Error(EndLoc, "invalid register in register list"); + Error(AfterMinusLoc, "invalid register in register list"); return MatchOperand_ParseFail; } // Ranges must go from low to high. if (Reg > EndReg) { - Error(EndLoc, "bad range in register list"); + Error(AfterMinusLoc, "bad range in register list"); return MatchOperand_ParseFail; } // Parse the lane specifier if present. VectorLaneTy NextLaneKind; unsigned NextLaneIndex; - if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + if (parseVectorLane(NextLaneKind, NextLaneIndex, E) != + MatchOperand_Success) return MatchOperand_ParseFail; if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { - Error(EndLoc, "mismatched lane index in register list"); + Error(AfterMinusLoc, "mismatched lane index in register list"); return MatchOperand_ParseFail; } - EndLoc = Parser.getTok().getLoc(); // Add all the registers in the range to the register list. Count += EndReg - Reg; @@ -3199,11 +3201,12 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Parse the lane specifier if present. VectorLaneTy NextLaneKind; unsigned NextLaneIndex; - SMLoc EndLoc = Parser.getTok().getLoc(); - if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + SMLoc LaneLoc = Parser.getTok().getLoc(); + if (parseVectorLane(NextLaneKind, NextLaneIndex, E) != + MatchOperand_Success) return MatchOperand_ParseFail; if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { - Error(EndLoc, "mismatched lane index in register list"); + Error(LaneLoc, "mismatched lane index in register list"); return MatchOperand_ParseFail; } continue; @@ -3224,7 +3227,7 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { VectorLaneTy NextLaneKind; unsigned NextLaneIndex; SMLoc EndLoc = Parser.getTok().getLoc(); - if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + if (parseVectorLane(NextLaneKind, NextLaneIndex, E) != MatchOperand_Success) return MatchOperand_ParseFail; if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { Error(EndLoc, "mismatched lane index in register list"); @@ -3232,11 +3235,11 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } } - SMLoc E = Parser.getTok().getLoc(); if (Parser.getTok().isNot(AsmToken::RCurly)) { - Error(E, "'}' expected"); + Error(Parser.getTok().getLoc(), "'}' expected"); return MatchOperand_ParseFail; } + E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat '}' token. switch (LaneKind) { @@ -3377,7 +3380,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); - assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + if (!Tok.is(AsmToken::Identifier)) + return MatchOperand_NoMatch; StringRef Mask = Tok.getString(); if (isMClass()) { @@ -3527,7 +3531,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op, const MCExpr *ShiftAmount; SMLoc Loc = Parser.getTok().getLoc(); - if (getParser().ParseExpression(ShiftAmount)) { + SMLoc EndLoc; + if (getParser().ParseExpression(ShiftAmount, EndLoc)) { Error(Loc, "illegal expression"); return MatchOperand_ParseFail; } @@ -3542,7 +3547,7 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op, return MatchOperand_ParseFail; } - Operands.push_back(ARMOperand::CreateImm(CE, Loc, Parser.getTok().getLoc())); + Operands.push_back(ARMOperand::CreateImm(CE, Loc, EndLoc)); return MatchOperand_Success; } @@ -3552,7 +3557,7 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { const AsmToken &Tok = Parser.getTok(); SMLoc S = Tok.getLoc(); if (Tok.isNot(AsmToken::Identifier)) { - Error(Tok.getLoc(), "'be' or 'le' operand expected"); + Error(S, "'be' or 'le' operand expected"); return MatchOperand_ParseFail; } int Val = StringSwitch<int>(Tok.getString()) @@ -3562,12 +3567,12 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat the token. if (Val == -1) { - Error(Tok.getLoc(), "'be' or 'le' operand expected"); + Error(S, "'be' or 'le' operand expected"); return MatchOperand_ParseFail; } Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::Create(Val, getContext()), - S, Parser.getTok().getLoc())); + S, Tok.getEndLoc())); return MatchOperand_Success; } @@ -3603,16 +3608,17 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_ParseFail; } Parser.Lex(); // Eat hash token. + SMLoc ExLoc = Parser.getTok().getLoc(); const MCExpr *ShiftAmount; - SMLoc E = Parser.getTok().getLoc(); - if (getParser().ParseExpression(ShiftAmount)) { - Error(E, "malformed shift expression"); + SMLoc EndLoc; + if (getParser().ParseExpression(ShiftAmount, EndLoc)) { + Error(ExLoc, "malformed shift expression"); return MatchOperand_ParseFail; } const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount); if (!CE) { - Error(E, "shift amount must be an immediate"); + Error(ExLoc, "shift amount must be an immediate"); return MatchOperand_ParseFail; } @@ -3620,25 +3626,24 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (isASR) { // Shift amount must be in [1,32] if (Val < 1 || Val > 32) { - Error(E, "'asr' shift amount must be in range [1,32]"); + Error(ExLoc, "'asr' shift amount must be in range [1,32]"); return MatchOperand_ParseFail; } // asr #32 encoded as asr #0, but is not allowed in Thumb2 mode. if (isThumb() && Val == 32) { - Error(E, "'asr #32' shift amount not allowed in Thumb mode"); + Error(ExLoc, "'asr #32' shift amount not allowed in Thumb mode"); return MatchOperand_ParseFail; } if (Val == 32) Val = 0; } else { // Shift amount must be in [1,32] if (Val < 0 || Val > 31) { - Error(E, "'lsr' shift amount must be in range [0,31]"); + Error(ExLoc, "'lsr' shift amount must be in range [0,31]"); return MatchOperand_ParseFail; } } - E = Parser.getTok().getLoc(); - Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, E)); + Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, EndLoc)); return MatchOperand_Success; } @@ -3664,16 +3669,17 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_ParseFail; } Parser.Lex(); // Eat hash token. + SMLoc ExLoc = Parser.getTok().getLoc(); const MCExpr *ShiftAmount; - SMLoc E = Parser.getTok().getLoc(); - if (getParser().ParseExpression(ShiftAmount)) { - Error(E, "malformed rotate expression"); + SMLoc EndLoc; + if (getParser().ParseExpression(ShiftAmount, EndLoc)) { + Error(ExLoc, "malformed rotate expression"); return MatchOperand_ParseFail; } const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount); if (!CE) { - Error(E, "rotate amount must be an immediate"); + Error(ExLoc, "rotate amount must be an immediate"); return MatchOperand_ParseFail; } @@ -3682,12 +3688,11 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // normally, zero is represented in asm by omitting the rotate operand // entirely. if (Val != 8 && Val != 16 && Val != 24 && Val != 0) { - Error(E, "'ror' rotate amount must be 8, 16, or 24"); + Error(ExLoc, "'ror' rotate amount must be 8, 16, or 24"); return MatchOperand_ParseFail; } - E = Parser.getTok().getLoc(); - Operands.push_back(ARMOperand::CreateRotImm(Val, S, E)); + Operands.push_back(ARMOperand::CreateRotImm(Val, S, EndLoc)); return MatchOperand_Success; } @@ -3737,7 +3742,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat hash token. const MCExpr *WidthExpr; - if (getParser().ParseExpression(WidthExpr)) { + SMLoc EndLoc; + if (getParser().ParseExpression(WidthExpr, EndLoc)) { Error(E, "malformed immediate expression"); return MatchOperand_ParseFail; } @@ -3753,9 +3759,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Error(E, "'width' operand must be in the range [1,32-lsb]"); return MatchOperand_ParseFail; } - E = Parser.getTok().getLoc(); - Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, E)); + Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, EndLoc)); return MatchOperand_Success; } @@ -3774,7 +3779,6 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Tok.getLoc(); bool haveEaten = false; bool isAdd = true; - int Reg = -1; if (Tok.is(AsmToken::Plus)) { Parser.Lex(); // Eat the '+' token. haveEaten = true; @@ -3783,15 +3787,15 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { isAdd = false; haveEaten = true; } - if (Parser.getTok().is(AsmToken::Identifier)) - Reg = tryParseRegister(); + + SMLoc E = Parser.getTok().getEndLoc(); + int Reg = tryParseRegister(); if (Reg == -1) { if (!haveEaten) return MatchOperand_NoMatch; Error(Parser.getTok().getLoc(), "register expected"); return MatchOperand_ParseFail; } - SMLoc E = Parser.getTok().getLoc(); ARM_AM::ShiftOpc ShiftTy = ARM_AM::no_shift; unsigned ShiftImm = 0; @@ -3799,6 +3803,9 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat the ','. if (parseMemRegOffsetShift(ShiftTy, ShiftImm)) return MatchOperand_ParseFail; + + // FIXME: Only approximates end...may include intervening whitespace. + E = Parser.getTok().getLoc(); } Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ShiftTy, @@ -3831,14 +3838,14 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // differently. bool isNegative = Parser.getTok().is(AsmToken::Minus); const MCExpr *Offset; - if (getParser().ParseExpression(Offset)) + SMLoc E; + if (getParser().ParseExpression(Offset, E)) return MatchOperand_ParseFail; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset); if (!CE) { Error(S, "constant expression expected"); return MatchOperand_ParseFail; } - SMLoc E = Tok.getLoc(); // Negative zero is encoded as the flag value INT32_MIN. int32_t Val = CE->getValue(); if (isNegative && Val == 0) @@ -3853,7 +3860,6 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { bool haveEaten = false; bool isAdd = true; - int Reg = -1; if (Tok.is(AsmToken::Plus)) { Parser.Lex(); // Eat the '+' token. haveEaten = true; @@ -3862,18 +3868,18 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { isAdd = false; haveEaten = true; } - if (Parser.getTok().is(AsmToken::Identifier)) - Reg = tryParseRegister(); + + Tok = Parser.getTok(); + int Reg = tryParseRegister(); if (Reg == -1) { if (!haveEaten) return MatchOperand_NoMatch; - Error(Parser.getTok().getLoc(), "register expected"); + Error(Tok.getLoc(), "register expected"); return MatchOperand_ParseFail; } - SMLoc E = Parser.getTok().getLoc(); Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ARM_AM::no_shift, - 0, S, E)); + 0, S, Tok.getEndLoc())); return MatchOperand_Success; } @@ -4226,7 +4232,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return Error(Tok.getLoc(), "malformed memory operand"); if (Tok.is(AsmToken::RBrac)) { - E = Tok.getLoc(); + E = Tok.getEndLoc(); Parser.Lex(); // Eat right bracket token. Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, ARM_AM::no_shift, @@ -4274,9 +4280,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } // Now we should have the closing ']' - E = Parser.getTok().getLoc(); if (Parser.getTok().isNot(AsmToken::RBrac)) - return Error(E, "']' expected"); + return Error(Parser.getTok().getLoc(), "']' expected"); + E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat right bracket token. // Don't worry about range checking the value here. That's handled by @@ -4323,9 +4329,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { CE = MCConstantExpr::Create(INT32_MIN, getContext()); // Now we should have the closing ']' - E = Parser.getTok().getLoc(); if (Parser.getTok().isNot(AsmToken::RBrac)) - return Error(E, "']' expected"); + return Error(Parser.getTok().getLoc(), "']' expected"); + E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat right bracket token. // Don't worry about range checking the value here. That's handled by @@ -4369,9 +4375,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } // Now we should have the closing ']' - E = Parser.getTok().getLoc(); if (Parser.getTok().isNot(AsmToken::RBrac)) - return Error(E, "']' expected"); + return Error(Parser.getTok().getLoc(), "']' expected"); + E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat right bracket token. Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, OffsetRegNum, @@ -4439,6 +4445,12 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, ((St == ARM_AM::lsl || St == ARM_AM::ror) && Imm > 31) || ((St == ARM_AM::lsr || St == ARM_AM::asr) && Imm > 32)) return Error(Loc, "immediate shift value out of range"); + // If <ShiftTy> #0, turn it into a no_shift. + if (Imm == 0) + St = ARM_AM::lsl; + // For consistency, treat lsr #32 and asr #32 as having immediate value 0. + if (Imm == 32) + Imm = 0; Amount = Imm; } @@ -4616,7 +4628,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, return true; const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal, - getContext()); + getContext()); E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E)); return false; @@ -4951,7 +4963,8 @@ static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) { static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features); /// Parse an arm instruction mnemonic followed by its operands. -bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, +bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Apply mnemonic aliases before doing anything else, as the destination // mnemnonic may include suffices and we want to handle them normally. @@ -5182,6 +5195,45 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, } } + // Adjust operands of ldrexd/strexd to MCK_GPRPair. + // ldrexd/strexd require even/odd GPR pair. To enforce this constraint, + // a single GPRPair reg operand is used in the .td file to replace the two + // GPRs. However, when parsing from asm, the two GRPs cannot be automatically + // expressed as a GPRPair, so we have to manually merge them. + // FIXME: We would really like to be able to tablegen'erate this. + if (!isThumb() && Operands.size() > 4 && + (Mnemonic == "ldrexd" || Mnemonic == "strexd")) { + bool isLoad = (Mnemonic == "ldrexd"); + unsigned Idx = isLoad ? 2 : 3; + ARMOperand* Op1 = static_cast<ARMOperand*>(Operands[Idx]); + ARMOperand* Op2 = static_cast<ARMOperand*>(Operands[Idx+1]); + + const MCRegisterClass& MRC = MRI->getRegClass(ARM::GPRRegClassID); + // Adjust only if Op1 and Op2 are GPRs. + if (Op1->isReg() && Op2->isReg() && MRC.contains(Op1->getReg()) && + MRC.contains(Op2->getReg())) { + unsigned Reg1 = Op1->getReg(); + unsigned Reg2 = Op2->getReg(); + unsigned Rt = MRI->getEncodingValue(Reg1); + unsigned Rt2 = MRI->getEncodingValue(Reg2); + + // Rt2 must be Rt + 1 and Rt must be even. + if (Rt + 1 != Rt2 || (Rt & 1)) { + Error(Op2->getStartLoc(), isLoad ? + "destination operands must be sequential" : + "source operands must be sequential"); + return true; + } + unsigned NewReg = MRI->getMatchingSuperReg(Reg1, ARM::gsub_0, + &(MRI->getRegClass(ARM::GPRPairRegClassID))); + Operands.erase(Operands.begin() + Idx, Operands.begin() + Idx + 2); + Operands.insert(Operands.begin() + Idx, ARMOperand::CreateReg( + NewReg, Op1->getStartLoc(), Op2->getEndLoc())); + delete Op1; + delete Op2; + } + } + return false; } @@ -5269,8 +5321,7 @@ validateInstruction(MCInst &Inst, switch (Inst.getOpcode()) { case ARM::LDRD: case ARM::LDRD_PRE: - case ARM::LDRD_POST: - case ARM::LDREXD: { + case ARM::LDRD_POST: { // Rt2 must be Rt + 1. unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg()); unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg()); @@ -5289,8 +5340,7 @@ validateInstruction(MCInst &Inst, return false; } case ARM::STRD_PRE: - case ARM::STRD_POST: - case ARM::STREXD: { + case ARM::STRD_POST: { // Rt2 must be Rt + 1. unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg()); unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(2).getReg()); @@ -5665,9 +5715,28 @@ bool ARMAsmParser:: processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { switch (Inst.getOpcode()) { + // Alias for alternate form of 'ADR Rd, #imm' instruction. + case ARM::ADDri: { + if (Inst.getOperand(1).getReg() != ARM::PC || + Inst.getOperand(5).getReg() != 0) + return false; + MCInst TmpInst; + TmpInst.setOpcode(ARM::ADR); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } // Aliases for alternate PC+imm syntax of LDR instructions. case ARM::t2LDRpcrel: - Inst.setOpcode(ARM::t2LDRpci); + // Select the narrow version if the immediate will fit. + if (Inst.getOperand(1).getImm() > 0 && + Inst.getOperand(1).getImm() <= 0xff) + Inst.setOpcode(ARM::tLDRpci); + else + Inst.setOpcode(ARM::t2LDRpci); return true; case ARM::t2LDRBpcrel: Inst.setOpcode(ARM::t2LDRBpci); @@ -7458,15 +7527,15 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { static const char *getSubtargetFeatureName(unsigned Val); bool ARMAsmParser:: -MatchAndEmitInstruction(SMLoc IDLoc, +MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out) { + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { MCInst Inst; - unsigned Kind; - unsigned ErrorInfo; unsigned MatchResult; - MatchResult = MatchInstructionImpl(Operands, Kind, Inst, ErrorInfo); + MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, + MatchingInlineAsm); switch (MatchResult) { default: break; case Match_Success: @@ -7768,13 +7837,10 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { return true; } -extern "C" void LLVMInitializeARMAsmLexer(); - /// Force static initialization. extern "C" void LLVMInitializeARMAsmParser() { RegisterMCAsmParser<ARMAsmParser> X(TheARMTarget); RegisterMCAsmParser<ARMAsmParser> Y(TheThumbTarget); - LLVMInitializeARMAsmLexer(); } #define GET_REGISTER_MATCHER diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt index e24a1b1..d2012c3 100644 --- a/lib/Target/ARM/AsmParser/CMakeLists.txt +++ b/lib/Target/ARM/AsmParser/CMakeLists.txt @@ -1,7 +1,6 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMARMAsmParser - ARMAsmLexer.cpp ARMAsmParser.cpp ) diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index ac916cc..586834c 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -11,7 +11,6 @@ tablegen(LLVM ARMGenDAGISel.inc -gen-dag-isel) tablegen(LLVM ARMGenFastISel.inc -gen-fast-isel) tablegen(LLVM ARMGenCallingConv.inc -gen-callingconv) tablegen(LLVM ARMGenSubtargetInfo.inc -gen-subtarget) -tablegen(LLVM ARMGenEDInfo.inc -gen-enhanced-disassembly-info) tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler) add_public_tablegen_target(ARMCommonTableGen) @@ -22,7 +21,6 @@ add_llvm_target(ARMCodeGen ARMCodeEmitter.cpp ARMConstantIslandPass.cpp ARMConstantPoolValue.cpp - ARMELFWriterInfo.cpp ARMExpandPseudoInsts.cpp ARMFastISel.cpp ARMFrameLowering.cpp @@ -39,6 +37,7 @@ add_llvm_target(ARMCodeGen ARMSubtarget.cpp ARMTargetMachine.cpp ARMTargetObjectFile.cpp + ARMTargetTransformInfo.cpp MLxExpansionPass.cpp Thumb1FrameLowering.cpp Thumb1InstrInfo.cpp diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 57642e1..31a3b0b 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -9,21 +9,20 @@ #define DEBUG_TYPE "arm-disassembler" +#include "llvm/MC/MCDisassembler.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "MCTargetDesc/ARMMCExpr.h" #include "MCTargetDesc/ARMBaseInfo.h" -#include "llvm/MC/EDInstInfo.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCExpr.h" +#include "MCTargetDesc/ARMMCExpr.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixedLenDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MemoryObject.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include <vector> @@ -105,10 +104,6 @@ public: uint64_t address, raw_ostream &vStream, raw_ostream &cStream) const; - - /// getEDInfo - See MCDisassembler. - const EDInstInfo *getEDInfo() const; -private: }; /// ThumbDisassembler - Thumb disassembler for all Thumb platforms. @@ -131,8 +126,6 @@ public: raw_ostream &vStream, raw_ostream &cStream) const; - /// getEDInfo - See MCDisassembler. - const EDInstInfo *getEDInfo() const; private: mutable ITStatus ITBlock; DecodeStatus AddThumbPredicate(MCInst&) const; @@ -385,7 +378,6 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); #include "ARMGenDisassemblerTables.inc" -#include "ARMGenEDInfo.inc" static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) { return new ARMDisassembler(STI); @@ -395,14 +387,6 @@ static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtarge return new ThumbDisassembler(STI); } -const EDInstInfo *ARMDisassembler::getEDInfo() const { - return instInfoARM; -} - -const EDInstInfo *ThumbDisassembler::getEDInfo() const { - return instInfoARM; -} - DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, const MemoryObject &Region, uint64_t Address, @@ -525,8 +509,9 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, else ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; const char *ReferenceName; - const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, - &ReferenceName); + uint64_t SymbolValue = 0x00000000ffffffffULL & Value; + const char *Name = SymbolLookUp(DisInfo, SymbolValue, &ReferenceType, + Address, &ReferenceName); if (Name) { SymbolicOp.AddSymbol.Name = Name; SymbolicOp.AddSymbol.Present = true; @@ -1280,7 +1265,13 @@ static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val, unsigned lsb = fieldFromInstruction(Val, 0, 5); DecodeStatus S = MCDisassembler::Success; - if (lsb > msb) Check(S, MCDisassembler::SoftFail); + if (lsb > msb) { + Check(S, MCDisassembler::SoftFail); + // The check above will cause the warning for the "potentially undefined + // instruction encoding" but we can't build a bad MCOperand value here + // with a lsb > msb or else printing the MCInst will cause a crash. + lsb = msb; + } uint32_t msb_mask = 0xFFFFFFFF; if (msb != 31) msb_mask = (1U << (msb+1)) - 1; @@ -1523,6 +1514,8 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; } unsigned amt = fieldFromInstruction(Insn, 7, 5); + if (Opc == ARM_AM::ror && amt == 0) + Opc = ARM_AM::rrx; unsigned imm = ARM_AM::getAM2Opc(Op, amt, Opc, idx_mode); Inst.addOperand(MCOperand::CreateImm(imm)); @@ -1564,6 +1557,9 @@ static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val, break; } + if (ShOp == ARM_AM::ror && imm == 0) + ShOp = ARM_AM::rrx; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) @@ -2089,16 +2085,28 @@ static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { - DecodeStatus S = MCDisassembler::Success; - unsigned imm = (fieldFromInstruction(Insn, 0, 11) << 0) | - (fieldFromInstruction(Insn, 11, 1) << 18) | - (fieldFromInstruction(Insn, 13, 1) << 17) | - (fieldFromInstruction(Insn, 16, 6) << 11) | - (fieldFromInstruction(Insn, 26, 1) << 19); - if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<20>(imm<<1) + 4, + DecodeStatus Status = MCDisassembler::Success; + + // Note the J1 and J2 values are from the encoded instruction. So here + // change them to I1 and I2 values via as documented: + // I1 = NOT(J1 EOR S); + // I2 = NOT(J2 EOR S); + // and build the imm32 with one trailing zero as documented: + // imm32 = SignExtend(S:I1:I2:imm10:imm11:'0', 32); + unsigned S = fieldFromInstruction(Insn, 26, 1); + unsigned J1 = fieldFromInstruction(Insn, 13, 1); + unsigned J2 = fieldFromInstruction(Insn, 11, 1); + unsigned I1 = !(J1 ^ S); + unsigned I2 = !(J2 ^ S); + unsigned imm10 = fieldFromInstruction(Insn, 16, 10); + unsigned imm11 = fieldFromInstruction(Insn, 0, 11); + unsigned tmp = (S << 23) | (I1 << 22) | (I2 << 21) | (imm10 << 11) | imm11; + int imm32 = SignExtend32<24>(tmp << 1); + if (!tryAddingSymbolicOperand(Address, Address + imm32 + 4, true, 4, Inst, Decoder)) - Inst.addOperand(MCOperand::CreateImm(SignExtend32<20>(imm << 1))); - return S; + Inst.addOperand(MCOperand::CreateImm(imm32)); + + return Status; } static DecodeStatus diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 8b9109e..d48b37e 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -13,11 +13,11 @@ #define DEBUG_TYPE "asm-printer" #include "ARMInstPrinter.h" -#include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/MC/MCInst.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/raw_ostream.h" @@ -29,11 +29,33 @@ using namespace llvm; /// /// getSORegOffset returns an integer from 0-31, representing '32' as 0. static unsigned translateShiftImm(unsigned imm) { + // lsr #32 and asr #32 exist, but should be encoded as a 0. + assert((imm & ~0x1f) == 0 && "Invalid shift encoding"); + if (imm == 0) return 32; return imm; } +/// Prints the shift value with an immediate value. +static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc, + unsigned ShImm, bool UseMarkup) { + if (ShOpc == ARM_AM::no_shift || (ShOpc == ARM_AM::lsl && !ShImm)) + return; + O << ", "; + + assert (!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0"); + O << getShiftOpcStr(ShOpc); + + if (ShOpc != ARM_AM::rrx) { + O << " "; + if (UseMarkup) + O << "<imm:"; + O << "#" << translateShiftImm(ShImm); + if (UseMarkup) + O << ">"; + } +} ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, @@ -45,7 +67,9 @@ ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, } void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - OS << getRegisterName(RegNo); + OS << markup("<reg:") + << getRegisterName(RegNo) + << markup(">"); } void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, @@ -85,10 +109,13 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, printSBitModifierOperand(MI, 6, O); printPredicateOperand(MI, 4, O); - O << '\t' << getRegisterName(Dst.getReg()) - << ", " << getRegisterName(MO1.getReg()); + O << '\t'; + printRegName(O, Dst.getReg()); + O << ", "; + printRegName(O, MO1.getReg()); - O << ", " << getRegisterName(MO2.getReg()); + O << ", "; + printRegName(O, MO2.getReg()); assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); printAnnotation(O, Annot); return; @@ -104,15 +131,20 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, printSBitModifierOperand(MI, 5, O); printPredicateOperand(MI, 3, O); - O << '\t' << getRegisterName(Dst.getReg()) - << ", " << getRegisterName(MO1.getReg()); + O << '\t'; + printRegName(O, Dst.getReg()); + O << ", "; + printRegName(O, MO1.getReg()); if (ARM_AM::getSORegShOp(MO2.getImm()) == ARM_AM::rrx) { printAnnotation(O, Annot); return; } - O << ", #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())); + O << ", " + << markup("<imm:") + << "#" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())) + << markup(">"); printAnnotation(O, Annot); return; } @@ -136,7 +168,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, MI->getOperand(3).getImm() == -4) { O << '\t' << "push"; printPredicateOperand(MI, 4, O); - O << "\t{" << getRegisterName(MI->getOperand(1).getReg()) << "}"; + O << "\t{"; + printRegName(O, MI->getOperand(1).getReg()); + O << "}"; printAnnotation(O, Annot); return; } @@ -159,7 +193,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, MI->getOperand(4).getImm() == 4) { O << '\t' << "pop"; printPredicateOperand(MI, 5, O); - O << "\t{" << getRegisterName(MI->getOperand(0).getReg()) << "}"; + O << "\t{"; + printRegName(O, MI->getOperand(0).getReg()); + O << "}"; printAnnotation(O, Annot); return; } @@ -198,7 +234,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, O << "\tldm"; printPredicateOperand(MI, 1, O); - O << '\t' << getRegisterName(BaseReg); + O << '\t'; + printRegName(O, BaseReg); if (Writeback) O << "!"; O << ", "; printRegisterList(MI, 3, O); @@ -215,6 +252,35 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, return; } + // Combine 2 GPRs from disassember into a GPRPair to match with instr def. + // ldrexd/strexd require even/odd GPR pair. To enforce this constraint, + // a single GPRPair reg operand is used in the .td file to replace the two + // GPRs. However, when decoding them, the two GRPs cannot be automatically + // expressed as a GPRPair, so we have to manually merge them. + // FIXME: We would really like to be able to tablegen'erate this. + if (Opcode == ARM::LDREXD || Opcode == ARM::STREXD) { + const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID); + bool isStore = Opcode == ARM::STREXD; + unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg(); + if (MRC.contains(Reg)) { + MCInst NewMI; + MCOperand NewReg; + NewMI.setOpcode(Opcode); + + if (isStore) + NewMI.addOperand(MI->getOperand(0)); + NewReg = MCOperand::CreateReg(MRI.getMatchingSuperReg(Reg, ARM::gsub_0, + &MRI.getRegClass(ARM::GPRPairRegClassID))); + NewMI.addOperand(NewReg); + + // Copy the rest operands into NewMI. + for(unsigned i= isStore ? 3 : 2; i < MI->getNumOperands(); ++i) + NewMI.addOperand(MI->getOperand(i)); + printInstruction(&NewMI, O); + return; + } + } + printInstruction(MI, O); printAnnotation(O, Annot); } @@ -224,9 +290,11 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { unsigned Reg = Op.getReg(); - O << getRegisterName(Reg); + printRegName(O, Reg); } else if (Op.isImm()) { - O << '#' << Op.getImm(); + O << markup("<imm:") + << '#' << formatImm(Op.getImm()) + << markup(">"); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); // If a symbolic branch target was added as a constant expression then print @@ -244,13 +312,16 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } } -void ARMInstPrinter::printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); if (MO1.isExpr()) O << *MO1.getExpr(); - else if (MO1.isImm()) - O << "[pc, #" << MO1.getImm() << "]"; + else if (MO1.isImm()) { + O << markup("<mem:") << "[pc, " + << markup("<imm:") << "#" << formatImm(MO1.getImm()) + << markup(">]>", "]"); + } else llvm_unreachable("Unknown LDR label operand?"); } @@ -266,7 +337,7 @@ void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum, const MCOperand &MO2 = MI->getOperand(OpNum+1); const MCOperand &MO3 = MI->getOperand(OpNum+2); - O << getRegisterName(MO1.getReg()); + printRegName(O, MO1.getReg()); // Print the shift opc. ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm()); @@ -274,7 +345,8 @@ void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum, if (ShOpc == ARM_AM::rrx) return; - O << ' ' << getRegisterName(MO2.getReg()); + O << ' '; + printRegName(O, MO2.getReg()); assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); } @@ -283,14 +355,11 @@ void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum, const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); - O << getRegisterName(MO1.getReg()); + printRegName(O, MO1.getReg()); // Print the shift opc. - ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm()); - O << ", " << ARM_AM::getShiftOpcStr(ShOpc); - if (ShOpc == ARM_AM::rrx) - return; - O << " #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())); + printRegImmShift(O, ARM_AM::getSORegShOp(MO2.getImm()), + ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup); } @@ -304,67 +373,51 @@ void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, const MCOperand &MO2 = MI->getOperand(Op+1); const MCOperand &MO3 = MI->getOperand(Op+2); - O << "[" << getRegisterName(MO1.getReg()); + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); if (!MO2.getReg()) { - if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. - O << ", #" + if (ARM_AM::getAM2Offset(MO3.getImm())) { // Don't print +0. + O << ", " + << markup("<imm:") + << "#" << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) - << ARM_AM::getAM2Offset(MO3.getImm()); - O << "]"; - return; - } - - O << ", " - << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) - << getRegisterName(MO2.getReg()); - - if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm())) - << " #" << ShImm; - O << "]"; -} - -void ARMInstPrinter::printAM2PostIndexOp(const MCInst *MI, unsigned Op, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(Op); - const MCOperand &MO2 = MI->getOperand(Op+1); - const MCOperand &MO3 = MI->getOperand(Op+2); - - O << "[" << getRegisterName(MO1.getReg()) << "], "; - - if (!MO2.getReg()) { - unsigned ImmOffs = ARM_AM::getAM2Offset(MO3.getImm()); - O << '#' - << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) - << ImmOffs; + << ARM_AM::getAM2Offset(MO3.getImm()) + << markup(">"); + } + O << "]" << markup(">"); return; } - O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) - << getRegisterName(MO2.getReg()); + O << ", "; + O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())); + printRegName(O, MO2.getReg()); - if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm())) - << " #" << ShImm; + printRegImmShift(O, ARM_AM::getAM2ShiftOpc(MO3.getImm()), + ARM_AM::getAM2Offset(MO3.getImm()), UseMarkup); + O << "]" << markup(">"); } void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); const MCOperand &MO2 = MI->getOperand(Op+1); - O << "[" << getRegisterName(MO1.getReg()) << ", " - << getRegisterName(MO2.getReg()) << "]"; + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); + O << ", "; + printRegName(O, MO2.getReg()); + O << "]" << markup(">"); } void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); const MCOperand &MO2 = MI->getOperand(Op+1); - O << "[" << getRegisterName(MO1.getReg()) << ", " - << getRegisterName(MO2.getReg()) << ", lsl #1]"; + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); + O << ", "; + printRegName(O, MO2.getReg()); + O << ", lsl " << markup("<imm:") << "#1" << markup(">") << "]" << markup(">"); } void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op, @@ -376,13 +429,13 @@ void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op, return; } +#ifndef NDEBUG const MCOperand &MO3 = MI->getOperand(Op+2); unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm()); + assert(IdxMode != ARMII::IndexModePost && + "Should be pre or offset index op"); +#endif - if (IdxMode == ARMII::IndexModePost) { - printAM2PostIndexOp(MI, Op, O); - return; - } printAM2PreOrOffsetIndexOp(MI, Op, O); } @@ -394,19 +447,18 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, if (!MO1.getReg()) { unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); - O << '#' - << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) - << ImmOffs; + O << markup("<imm:") + << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) + << ImmOffs + << markup(">"); return; } - O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) - << getRegisterName(MO1.getReg()); + O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())); + printRegName(O, MO1.getReg()); - if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm())) - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm())) - << " #" << ShImm; + printRegImmShift(O, ARM_AM::getAM2ShiftOpc(MO2.getImm()), + ARM_AM::getAM2Offset(MO2.getImm()), UseMarkup); } //===--------------------------------------------------------------------===// @@ -419,18 +471,22 @@ void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op, const MCOperand &MO2 = MI->getOperand(Op+1); const MCOperand &MO3 = MI->getOperand(Op+2); - O << "[" << getRegisterName(MO1.getReg()) << "], "; + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); + O << "], " << markup(">"); if (MO2.getReg()) { - O << (char)ARM_AM::getAM3Op(MO3.getImm()) - << getRegisterName(MO2.getReg()); + O << (char)ARM_AM::getAM3Op(MO3.getImm()); + printRegName(O, MO2.getReg()); return; } unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()); - O << '#' + O << markup("<imm:") + << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) - << ImmOffs; + << ImmOffs + << markup(">"); } void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, @@ -439,23 +495,29 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, const MCOperand &MO2 = MI->getOperand(Op+1); const MCOperand &MO3 = MI->getOperand(Op+2); - O << '[' << getRegisterName(MO1.getReg()); + O << markup("<mem:") << '['; + printRegName(O, MO1.getReg()); if (MO2.getReg()) { - O << ", " << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) - << getRegisterName(MO2.getReg()) << ']'; + O << ", " << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())); + printRegName(O, MO2.getReg()); + O << ']' << markup(">"); return; } - //If the op is sub we have to print the immediate even if it is 0 + //If the op is sub we have to print the immediate even if it is 0 unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()); ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm()); - - if (ImmOffs || (op == ARM_AM::sub)) - O << ", #" + + if (ImmOffs || (op == ARM_AM::sub)) { + O << ", " + << markup("<imm:") + << "#" << ARM_AM::getAddrOpcStr(op) - << ImmOffs; - O << ']'; + << ImmOffs + << markup(">"); + } + O << ']' << markup(">"); } void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op, @@ -483,15 +545,15 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, const MCOperand &MO2 = MI->getOperand(OpNum+1); if (MO1.getReg()) { - O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) - << getRegisterName(MO1.getReg()); + O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())); + printRegName(O, MO1.getReg()); return; } unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); - O << '#' - << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) - << ImmOffs; + O << markup("<imm:") + << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs + << markup(">"); } void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, @@ -499,7 +561,9 @@ void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); unsigned Imm = MO.getImm(); - O << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff); + O << markup("<imm:") + << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff) + << markup(">"); } void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, @@ -507,7 +571,8 @@ void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); - O << (MO2.getImm() ? "" : "-") << getRegisterName(MO1.getReg()); + O << (MO2.getImm() ? "" : "-"); + printRegName(O, MO1.getReg()); } void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI, @@ -515,7 +580,9 @@ void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); unsigned Imm = MO.getImm(); - O << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2); + O << markup("<imm:") + << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2) + << markup(">"); } @@ -536,16 +603,20 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, return; } - O << "[" << getRegisterName(MO1.getReg()); + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm()); unsigned Op = ARM_AM::getAM5Op(MO2.getImm()); if (ImmOffs || Op == ARM_AM::sub) { - O << ", #" + O << ", " + << markup("<imm:") + << "#" << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm())) - << ImmOffs * 4; + << ImmOffs * 4 + << markup(">"); } - O << "]"; + O << "]" << markup(">"); } void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, @@ -553,18 +624,21 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); - O << "[" << getRegisterName(MO1.getReg()); + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); if (MO2.getImm()) { // FIXME: Both darwin as and GNU as violate ARM docs here. O << ", :" << (MO2.getImm() << 3); } - O << "]"; + O << "]" << markup(">"); } void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - O << "[" << getRegisterName(MO1.getReg()) << "]"; + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); + O << "]" << markup(">"); } void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI, @@ -573,8 +647,10 @@ void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI, const MCOperand &MO = MI->getOperand(OpNum); if (MO.getReg() == 0) O << "!"; - else - O << ", " << getRegisterName(MO.getReg()); + else { + O << ", "; + printRegName(O, MO.getReg()); + } } void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI, @@ -585,7 +661,9 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI, int32_t lsb = CountTrailingZeros_32(v); int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb; assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!"); - O << '#' << lsb << ", #" << width; + O << markup("<imm:") << '#' << lsb << markup(">") + << ", " + << markup("<imm:") << '#' << width << markup(">"); } void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum, @@ -599,10 +677,18 @@ void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum, unsigned ShiftOp = MI->getOperand(OpNum).getImm(); bool isASR = (ShiftOp & (1 << 5)) != 0; unsigned Amt = ShiftOp & 0x1f; - if (isASR) - O << ", asr #" << (Amt == 0 ? 32 : Amt); - else if (Amt) - O << ", lsl #" << Amt; + if (isASR) { + O << ", asr " + << markup("<imm:") + << "#" << (Amt == 0 ? 32 : Amt) + << markup(">"); + } + else if (Amt) { + O << ", lsl " + << markup("<imm:") + << "#" << Amt + << markup(">"); + } } void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, @@ -611,7 +697,7 @@ void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, if (Imm == 0) return; assert(Imm > 0 && Imm < 32 && "Invalid PKH shift immediate value!"); - O << ", lsl #" << Imm; + O << ", lsl " << markup("<imm:") << "#" << Imm << markup(">"); } void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, @@ -621,7 +707,7 @@ void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, if (Imm == 0) Imm = 32; assert(Imm > 0 && Imm <= 32 && "Invalid PKH shift immediate value!"); - O << ", asr #" << Imm; + O << ", asr " << markup("<imm:") << "#" << Imm << markup(">"); } void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum, @@ -629,11 +715,20 @@ void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum, O << "{"; for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) { if (i != OpNum) O << ", "; - O << getRegisterName(MI->getOperand(i).getReg()); + printRegName(O, MI->getOperand(i).getReg()); } O << "}"; } +void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Reg = MI->getOperand(OpNum).getReg(); + printRegName(O, MRI.getSubReg(Reg, ARM::gsub_0)); + O << ", "; + printRegName(O, MRI.getSubReg(Reg, ARM::gsub_1)); +} + + void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNum); @@ -803,23 +898,29 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum, int32_t OffImm = (int32_t)MO.getImm(); + O << markup("<imm:"); if (OffImm == INT32_MIN) O << "#-0"; else if (OffImm < 0) O << "#-" << -OffImm; else O << "#" << OffImm; + O << markup(">"); } void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - O << "#" << MI->getOperand(OpNum).getImm() * 4; + O << markup("<imm:") + << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4) + << markup(">"); } void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); - O << "#" << (Imm == 0 ? 32 : Imm); + O << markup("<imm:") + << "#" << formatImm((Imm == 0 ? 32 : Imm)) + << markup(">"); } void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum, @@ -849,10 +950,13 @@ void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op, return; } - O << "[" << getRegisterName(MO1.getReg()); - if (unsigned RegNum = MO2.getReg()) - O << ", " << getRegisterName(RegNum); - O << "]"; + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); + if (unsigned RegNum = MO2.getReg()) { + O << ", "; + printRegName(O, RegNum); + } + O << "]" << markup(">"); } void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI, @@ -867,10 +971,15 @@ void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI, return; } - O << "[" << getRegisterName(MO1.getReg()); - if (unsigned ImmOffs = MO2.getImm()) - O << ", #" << ImmOffs * Scale; - O << "]"; + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); + if (unsigned ImmOffs = MO2.getImm()) { + O << ", " + << markup("<imm:") + << "#" << formatImm(ImmOffs * Scale) + << markup(">"); + } + O << "]" << markup(">"); } void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI, @@ -906,14 +1015,12 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum, const MCOperand &MO2 = MI->getOperand(OpNum+1); unsigned Reg = MO1.getReg(); - O << getRegisterName(Reg); + printRegName(O, Reg); // Print the shift opc. assert(MO2.isImm() && "Not a valid t2_so_reg value!"); - ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm()); - O << ", " << ARM_AM::getShiftOpcStr(ShOpc); - if (ShOpc != ARM_AM::rrx) - O << " #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())); + printRegImmShift(O, ARM_AM::getSORegShOp(MO2.getImm()), + ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup); } void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, @@ -926,18 +1033,27 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, return; } - O << "[" << getRegisterName(MO1.getReg()); + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); int32_t OffImm = (int32_t)MO2.getImm(); bool isSub = OffImm < 0; // Special value for #-0. All others are normal. if (OffImm == INT32_MIN) OffImm = 0; - if (isSub) - O << ", #-" << -OffImm; - else if (OffImm > 0) - O << ", #" << OffImm; - O << "]"; + if (isSub) { + O << ", " + << markup("<imm:") + << "#-" << -OffImm + << markup(">"); + } + else if (OffImm > 0) { + O << ", " + << markup("<imm:") + << "#" << OffImm + << markup(">"); + } + O << "]" << markup(">"); } void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, @@ -946,17 +1062,24 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); - O << "[" << getRegisterName(MO1.getReg()); + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); int32_t OffImm = (int32_t)MO2.getImm(); // Don't print +0. + if (OffImm != 0) + O << ", "; + if (OffImm != 0 && UseMarkup) + O << "<imm:"; if (OffImm == INT32_MIN) - O << ", #-0"; + O << "#-0"; else if (OffImm < 0) - O << ", #-" << -OffImm; + O << "#-" << -OffImm; else if (OffImm > 0) - O << ", #" << OffImm; - O << "]"; + O << "#" << OffImm; + if (OffImm != 0 && UseMarkup) + O << ">"; + O << "]" << markup(">"); } void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, @@ -970,20 +1093,27 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, return; } - O << "[" << getRegisterName(MO1.getReg()); + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); int32_t OffImm = (int32_t)MO2.getImm(); assert(((OffImm & 0x3) == 0) && "Not a valid immediate!"); // Don't print +0. + if (OffImm != 0) + O << ", "; + if (OffImm != 0 && UseMarkup) + O << "<imm:"; if (OffImm == INT32_MIN) - O << ", #-0"; + O << "#-0"; else if (OffImm < 0) - O << ", #-" << -OffImm; + O << "#-" << -OffImm; else if (OffImm > 0) - O << ", #" << OffImm; - O << "]"; + O << "#" << OffImm; + if (OffImm != 0 && UseMarkup) + O << ">"; + O << "]" << markup(">"); } void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI, @@ -992,10 +1122,15 @@ void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI, const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); - O << "[" << getRegisterName(MO1.getReg()); - if (MO2.getImm()) - O << ", #" << MO2.getImm() * 4; - O << "]"; + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); + if (MO2.getImm()) { + O << ", " + << markup("<imm:") + << "#" << formatImm(MO2.getImm() * 4) + << markup(">"); + } + O << "]" << markup(">"); } void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, @@ -1003,11 +1138,12 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); int32_t OffImm = (int32_t)MO1.getImm(); - // Don't print +0. + O << ", " << markup("<imm:"); if (OffImm < 0) - O << ", #-" << -OffImm; + O << "#-" << -OffImm; else - O << ", #" << OffImm; + O << "#" << OffImm; + O << markup(">"); } void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, @@ -1019,12 +1155,18 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, assert(((OffImm & 0x3) == 0) && "Not a valid immediate!"); // Don't print +0. + if (OffImm != 0) + O << ", "; + if (OffImm != 0 && UseMarkup) + O << "<imm:"; if (OffImm == INT32_MIN) - O << ", #-0"; + O << "#-0"; else if (OffImm < 0) - O << ", #-" << -OffImm; + O << "#-" << -OffImm; else if (OffImm > 0) - O << ", #" << OffImm; + O << "#" << OffImm; + if (OffImm != 0 && UseMarkup) + O << ">"; } void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, @@ -1034,23 +1176,30 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, const MCOperand &MO2 = MI->getOperand(OpNum+1); const MCOperand &MO3 = MI->getOperand(OpNum+2); - O << "[" << getRegisterName(MO1.getReg()); + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); assert(MO2.getReg() && "Invalid so_reg load / store address!"); - O << ", " << getRegisterName(MO2.getReg()); + O << ", "; + printRegName(O, MO2.getReg()); unsigned ShAmt = MO3.getImm(); if (ShAmt) { assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!"); - O << ", lsl #" << ShAmt; + O << ", lsl " + << markup("<imm:") + << "#" << ShAmt + << markup(">"); } - O << "]"; + O << "]" << markup(">"); } void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); - O << '#' << ARM_AM::getFPImmFloat(MO.getImm()); + O << markup("<imm:") + << '#' << ARM_AM::getFPImmFloat(MO.getImm()) + << markup(">"); } void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, @@ -1058,14 +1207,18 @@ void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, unsigned EncodedImm = MI->getOperand(OpNum).getImm(); unsigned EltBits; uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits); - O << "#0x"; + O << markup("<imm:") + << "#0x"; O.write_hex(Val); + O << markup(">"); } void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); - O << "#" << Imm + 1; + O << markup("<imm:") + << "#" << formatImm(Imm + 1) + << markup(">"); } void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum, @@ -1073,23 +1226,30 @@ void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum, unsigned Imm = MI->getOperand(OpNum).getImm(); if (Imm == 0) return; - O << ", ror #"; + O << ", ror " + << markup("<imm:") + << "#"; switch (Imm) { default: assert (0 && "illegal ror immediate!"); case 1: O << "8"; break; case 2: O << "16"; break; case 3: O << "24"; break; } + O << markup(">"); } void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - O << "#" << 16 - MI->getOperand(OpNum).getImm(); + O << markup("<imm:") + << "#" << 16 - MI->getOperand(OpNum).getImm() + << markup(">"); } void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - O << "#" << 32 - MI->getOperand(OpNum).getImm(); + O << markup("<imm:") + << "#" << 32 - MI->getOperand(OpNum).getImm() + << markup(">"); } void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, @@ -1099,7 +1259,9 @@ void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "}"; + O << "{"; + printRegName(O, MI->getOperand(OpNum).getReg()); + O << "}"; } void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum, @@ -1107,7 +1269,11 @@ void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum, unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1); - O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}"; + O << "{"; + printRegName(O, Reg0); + O << ", "; + printRegName(O, Reg1); + O << "}"; } void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, @@ -1116,7 +1282,11 @@ void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2); - O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}"; + O << "{"; + printRegName(O, Reg0); + O << ", "; + printRegName(O, Reg1); + O << "}"; } void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum, @@ -1124,9 +1294,13 @@ void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum, // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}"; + O << "{"; + printRegName(O, MI->getOperand(OpNum).getReg()); + O << ", "; + printRegName(O, MI->getOperand(OpNum).getReg() + 1); + O << ", "; + printRegName(O, MI->getOperand(OpNum).getReg() + 2); + O << "}"; } void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum, @@ -1134,16 +1308,23 @@ void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum, // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "}"; + O << "{"; + printRegName(O, MI->getOperand(OpNum).getReg()); + O << ", "; + printRegName(O, MI->getOperand(OpNum).getReg() + 1); + O << ", "; + printRegName(O, MI->getOperand(OpNum).getReg() + 2); + O << ", "; + printRegName(O, MI->getOperand(OpNum).getReg() + 3); + O << "}"; } void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[]}"; + O << "{"; + printRegName(O, MI->getOperand(OpNum).getReg()); + O << "[]}"; } void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI, @@ -1152,7 +1333,11 @@ void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI, unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1); - O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}"; + O << "{"; + printRegName(O, Reg0); + O << "[], "; + printRegName(O, Reg1); + O << "[]}"; } void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI, @@ -1161,9 +1346,13 @@ void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI, // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}"; + O << "{"; + printRegName(O, MI->getOperand(OpNum).getReg()); + O << "[], "; + printRegName(O, MI->getOperand(OpNum).getReg() + 1); + O << "[], "; + printRegName(O, MI->getOperand(OpNum).getReg() + 2); + O << "[]}"; } void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI, @@ -1172,10 +1361,15 @@ void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI, // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "[]}"; + O << "{"; + printRegName(O, MI->getOperand(OpNum).getReg()); + O << "[], "; + printRegName(O, MI->getOperand(OpNum).getReg() + 1); + O << "[], "; + printRegName(O, MI->getOperand(OpNum).getReg() + 2); + O << "[], "; + printRegName(O, MI->getOperand(OpNum).getReg() + 3); + O << "[]}"; } void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI, @@ -1184,7 +1378,11 @@ void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2); - O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}"; + O << "{"; + printRegName(O, Reg0); + O << "[], "; + printRegName(O, Reg1); + O << "[]}"; } void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI, @@ -1193,9 +1391,13 @@ void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI, // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[]}"; + O << "{"; + printRegName(O, MI->getOperand(OpNum).getReg()); + O << "[], "; + printRegName(O, MI->getOperand(OpNum).getReg() + 2); + O << "[], "; + printRegName(O, MI->getOperand(OpNum).getReg() + 4); + O << "[]}"; } void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI, @@ -1204,10 +1406,15 @@ void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI, // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "[]}"; + O << "{"; + printRegName(O, MI->getOperand(OpNum).getReg()); + O << "[], "; + printRegName(O, MI->getOperand(OpNum).getReg() + 2); + O << "[], "; + printRegName(O, MI->getOperand(OpNum).getReg() + 4); + O << "[], "; + printRegName(O, MI->getOperand(OpNum).getReg() + 6); + O << "[]}"; } void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI, @@ -1216,9 +1423,13 @@ void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI, // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "}"; + O << "{"; + printRegName(O, MI->getOperand(OpNum).getReg()); + O << ", "; + printRegName(O, MI->getOperand(OpNum).getReg() + 2); + O << ", "; + printRegName(O, MI->getOperand(OpNum).getReg() + 4); + O << "}"; } void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, @@ -1227,8 +1438,13 @@ void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "}"; + O << "{"; + printRegName(O, MI->getOperand(OpNum).getReg()); + O << ", "; + printRegName(O, MI->getOperand(OpNum).getReg() + 2); + O << ", "; + printRegName(O, MI->getOperand(OpNum).getReg() + 4); + O << ", "; + printRegName(O, MI->getOperand(OpNum).getReg() + 6); + O << "}"; } diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 73d7bfd..edff75d 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -124,9 +124,11 @@ public: void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printGPRPairOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); diff --git a/lib/Target/ARM/LICENSE.TXT b/lib/Target/ARM/LICENSE.TXT new file mode 100755 index 0000000..68afea1 --- /dev/null +++ b/lib/Target/ARM/LICENSE.TXT @@ -0,0 +1,47 @@ +ARM Limited + +Software Grant License Agreement ("Agreement") + +Except for the license granted herein to you, ARM Limited ("ARM") reserves all +right, title, and interest in and to the Software (defined below). + +Definition + +"Software" means the code and documentation as well as any original work of +authorship, including any modifications or additions to an existing work, that +is intentionally submitted by ARM to llvm.org (http://llvm.org) ("LLVM") for +inclusion in, or documentation of, any of the products owned or managed by LLVM +(the "Work"). For the purposes of this definition, "submitted" means any form of +electronic, verbal, or written communication sent to LLVM or its +representatives, including but not limited to communication on electronic +mailing lists, source code control systems, and issue tracking systems that are +managed by, or on behalf of, LLVM for the purpose of discussing and improving +the Work, but excluding communication that is conspicuously marked otherwise. + +1. Grant of Copyright License. Subject to the terms and conditions of this + Agreement, ARM hereby grants to you and to recipients of the Software + distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge, + royalty-free, irrevocable copyright license to reproduce, prepare derivative + works of, publicly display, publicly perform, sublicense, and distribute the + Software and such derivative works. + +2. Grant of Patent License. Subject to the terms and conditions of this + Agreement, ARM hereby grants you and to recipients of the Software + distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge, + royalty-free, irrevocable (except as stated in this section) patent license + to make, have made, use, offer to sell, sell, import, and otherwise transfer + the Work, where such license applies only to those patent claims licensable + by ARM that are necessarily infringed by ARM's Software alone or by + combination of the Software with the Work to which such Software was + submitted. If any entity institutes patent litigation against ARM or any + other entity (including a cross-claim or counterclaim in a lawsuit) alleging + that ARM's Software, or the Work to which ARM has contributed constitutes + direct or contributory patent infringement, then any patent licenses granted + to that entity under this Agreement for the Software or Work shall terminate + as of the date such litigation is filed. + +Unless required by applicable law or agreed to in writing, the software is +provided on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +either express or implied, including, without limitation, any warranties or +conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +PARTICULAR PURPOSE. diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index ac6ce64..1f1b334 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -8,9 +8,10 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/ARMMCTargetDesc.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" -#include "MCTargetDesc/ARMAddressingModes.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" @@ -21,7 +22,6 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" @@ -114,11 +114,15 @@ public: MCValue &Target, uint64_t &Value, bool &IsResolved); + + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const; + bool mayNeedRelaxation(const MCInst &Inst) const; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCInstFragment *DF, + const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const; void relaxInstruction(const MCInst &Inst, MCInst &Res) const; @@ -161,7 +165,7 @@ bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const { bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCInstFragment *DF, + const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const { switch ((unsigned)Fixup.getKind()) { case ARM::fixup_arm_thumb_br: { @@ -216,7 +220,7 @@ void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8 const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP - const uint32_t ARMv4_NopEncoding = 0xe1a0000; // using MOV r0,r0 + const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0 const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP if (isThumb()) { const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding @@ -552,63 +556,6 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, (void)adjustFixupValue(Fixup, Value, &Asm.getContext()); } -namespace { - -// FIXME: This should be in a separate file. -// ELF is an ELF of course... -class ELFARMAsmBackend : public ARMAsmBackend { -public: - uint8_t OSABI; - ELFARMAsmBackend(const Target &T, const StringRef TT, - uint8_t _OSABI) - : ARMAsmBackend(T, TT), OSABI(_OSABI) { } - - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value) const; - - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createARMELFObjectWriter(OS, OSABI); - } -}; - -// FIXME: Raise this to share code between Darwin and ELF. -void ELFARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value) const { - unsigned NumBytes = 4; // FIXME: 2 for Thumb - Value = adjustFixupValue(Fixup, Value); - if (!Value) return; // Doesn't change encoding. - - unsigned Offset = Fixup.getOffset(); - - // For each byte of the fragment that the fixup touches, mask in the bits from - // the fixup value. The Value has been "split up" into the appropriate - // bitfields above. - for (unsigned i = 0; i != NumBytes; ++i) - Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); -} - -// FIXME: This should be in a separate file. -class DarwinARMAsmBackend : public ARMAsmBackend { -public: - const object::mach::CPUSubtypeARM Subtype; - DarwinARMAsmBackend(const Target &T, const StringRef TT, - object::mach::CPUSubtypeARM st) - : ARMAsmBackend(T, TT), Subtype(st) { } - - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createARMMachObjectWriter(OS, /*Is64Bit=*/false, - object::mach::CTM_ARM, - Subtype); - } - - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value) const; - - virtual bool doesSectionRequireSymbols(const MCSection &Section) const { - return false; - } -}; - /// getFixupKindNumBytes - The number of bytes the fixup may change. static unsigned getFixupKindNumBytes(unsigned Kind) { switch (Kind) { @@ -657,8 +604,8 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { } } -void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value) const { +void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, + unsigned DataSize, uint64_t Value) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); Value = adjustFixupValue(Fixup, Value); if (!Value) return; // Doesn't change encoding. @@ -666,15 +613,53 @@ void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned Offset = Fixup.getOffset(); assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); - // For each byte of the fragment that the fixup touches, mask in the - // bits from the fixup value. + // For each byte of the fragment that the fixup touches, mask in the bits from + // the fixup value. The Value has been "split up" into the appropriate + // bitfields above. for (unsigned i = 0; i != NumBytes; ++i) Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); } +namespace { + +// FIXME: This should be in a separate file. +// ELF is an ELF of course... +class ELFARMAsmBackend : public ARMAsmBackend { +public: + uint8_t OSABI; + ELFARMAsmBackend(const Target &T, const StringRef TT, + uint8_t _OSABI) + : ARMAsmBackend(T, TT), OSABI(_OSABI) { } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return createARMELFObjectWriter(OS, OSABI); + } +}; + +// FIXME: This should be in a separate file. +class DarwinARMAsmBackend : public ARMAsmBackend { +public: + const object::mach::CPUSubtypeARM Subtype; + DarwinARMAsmBackend(const Target &T, const StringRef TT, + object::mach::CPUSubtypeARM st) + : ARMAsmBackend(T, TT), Subtype(st) { + HasDataInCodeSupport = true; + } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return createARMMachObjectWriter(OS, /*Is64Bit=*/false, + object::mach::CTM_ARM, + Subtype); + } + + virtual bool doesSectionRequireSymbols(const MCSection &Section) const { + return false; + } +}; + } // end anonymous namespace -MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT) { +MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU) { Triple TheTriple(TT); if (TheTriple.isOSDarwin()) { @@ -687,6 +672,15 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT) { else if (TheTriple.getArchName() == "armv6" || TheTriple.getArchName() == "thumbv6") return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6); + else if (TheTriple.getArchName() == "armv7f" || + TheTriple.getArchName() == "thumbv7f") + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7F); + else if (TheTriple.getArchName() == "armv7k" || + TheTriple.getArchName() == "thumbv7k") + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7K); + else if (TheTriple.getArchName() == "armv7s" || + TheTriple.getArchName() == "thumbv7s") + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7S); return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index b53da3b..9193e40 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -7,17 +7,17 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARMFixupKinds.h" #include "MCTargetDesc/ARMMCTargetDesc.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" +#include "MCTargetDesc/ARMFixupKinds.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -133,6 +133,7 @@ const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm, switch (RelocType) { default: EmitThisSym = true; break; case ELF::R_ARM_ABS32: EmitThisSym = false; break; + case ELF::R_ARM_PREL31: EmitThisSym = false; break; } } @@ -225,6 +226,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case FK_Data_4: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_ARM_NONE: + Type = ELF::R_ARM_NONE; + break; case MCSymbolRefExpr::VK_ARM_GOT: Type = ELF::R_ARM_GOT_BREL; break; @@ -246,7 +250,13 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_ARM_TARGET1: Type = ELF::R_ARM_TARGET1; break; - } + case MCSymbolRefExpr::VK_ARM_TARGET2: + Type = ELF::R_ARM_TARGET2; + break; + case MCSymbolRefExpr::VK_ARM_PREL31: + Type = ELF::R_ARM_PREL31; + break; + } break; case ARM::fixup_arm_ldst_pcrel_12: case ARM::fixup_arm_pcrel_10: diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp new file mode 100644 index 0000000..39ded8f --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -0,0 +1,201 @@ +//===- lib/MC/ARMELFStreamer.cpp - ELF Object Output for ARM --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file assembles .s files and emits ARM ELF .o object files. Different +// from generic ELF streamer in emitting mapping symbols ($a, $t and $d) to +// delimit regions of data and code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELF.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCELFSymbolFlags.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + +/// Extend the generic ELFStreamer class so that it can emit mapping symbols at +/// the appropriate points in the object files. These symbols are defined in the +/// ARM ELF ABI: infocenter.arm.com/help/topic/com.arm.../IHI0044D_aaelf.pdf. +/// +/// In brief: $a, $t or $d should be emitted at the start of each contiguous +/// region of ARM code, Thumb code or data in a section. In practice, this +/// emission does not rely on explicit assembler directives but on inherent +/// properties of the directives doing the emission (e.g. ".byte" is data, "add +/// r0, r0, r0" an instruction). +/// +/// As a result this system is orthogonal to the DataRegion infrastructure used +/// by MachO. Beware! +class ARMELFStreamer : public MCELFStreamer { +public: + ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, bool IsThumb) + : MCELFStreamer(Context, TAB, OS, Emitter), + IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) { + } + + ~ARMELFStreamer() {} + + virtual void ChangeSection(const MCSection *Section) { + // We have to keep track of the mapping symbol state of any sections we + // use. Each one should start off as EMS_None, which is provided as the + // default constructor by DenseMap::lookup. + LastMappingSymbols[getPreviousSection()] = LastEMS; + LastEMS = LastMappingSymbols.lookup(Section); + + MCELFStreamer::ChangeSection(Section); + } + + /// This function is the one used to emit instruction data into the ELF + /// streamer. We override it to add the appropriate mapping symbol if + /// necessary. + virtual void EmitInstruction(const MCInst& Inst) { + if (IsThumb) + EmitThumbMappingSymbol(); + else + EmitARMMappingSymbol(); + + MCELFStreamer::EmitInstruction(Inst); + } + + /// This is one of the functions used to emit data into an ELF section, so the + /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if + /// necessary. + virtual void EmitBytes(StringRef Data, unsigned AddrSpace) { + EmitDataMappingSymbol(); + MCELFStreamer::EmitBytes(Data, AddrSpace); + } + + /// This is one of the functions used to emit data into an ELF section, so the + /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if + /// necessary. + virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) { + EmitDataMappingSymbol(); + MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace); + } + + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) { + MCELFStreamer::EmitAssemblerFlag(Flag); + + switch (Flag) { + case MCAF_SyntaxUnified: + return; // no-op here. + case MCAF_Code16: + IsThumb = true; + return; // Change to Thumb mode + case MCAF_Code32: + IsThumb = false; + return; // Change to ARM mode + case MCAF_Code64: + return; + case MCAF_SubsectionsViaSymbols: + return; + } + } + +private: + enum ElfMappingSymbol { + EMS_None, + EMS_ARM, + EMS_Thumb, + EMS_Data + }; + + void EmitDataMappingSymbol() { + if (LastEMS == EMS_Data) return; + EmitMappingSymbol("$d"); + LastEMS = EMS_Data; + } + + void EmitThumbMappingSymbol() { + if (LastEMS == EMS_Thumb) return; + EmitMappingSymbol("$t"); + LastEMS = EMS_Thumb; + } + + void EmitARMMappingSymbol() { + if (LastEMS == EMS_ARM) return; + EmitMappingSymbol("$a"); + LastEMS = EMS_ARM; + } + + void EmitMappingSymbol(StringRef Name) { + MCSymbol *Start = getContext().CreateTempSymbol(); + EmitLabel(Start); + + MCSymbol *Symbol = + getContext().GetOrCreateSymbol(Name + "." + + Twine(MappingSymbolCounter++)); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + MCELF::SetType(SD, ELF::STT_NOTYPE); + MCELF::SetBinding(SD, ELF::STB_LOCAL); + SD.setExternal(false); + Symbol->setSection(*getCurrentSection()); + + const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); + Symbol->setVariableValue(Value); + } + + void EmitThumbFunc(MCSymbol *Func) { + // FIXME: Anything needed here to flag the function as thumb? + + getAssembler().setIsThumbFunc(Func); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Func); + SD.setFlags(SD.getFlags() | ELF_Other_ThumbFunc); + } + + + bool IsThumb; + int64_t MappingSymbolCounter; + + DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols; + ElfMappingSymbol LastEMS; + + /// @} +}; +} + +namespace llvm { + MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack, + bool IsThumb) { + ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb); + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + if (NoExecStack) + S->getAssembler().setNoExecStack(true); + return S; + } + +} + + diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h new file mode 100644 index 0000000..77ae5d2 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h @@ -0,0 +1,27 @@ +//===-- ARMELFStreamer.h - ELF Streamer for ARM ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF streamer information for the ARM backend. +// +//===----------------------------------------------------------------------===// + +#ifndef ARM_ELF_STREAMER_H +#define ARM_ELF_STREAMER_H + +#include "llvm/MC/MCELFStreamer.h" + +namespace llvm { + + MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack, + bool IsThumb); +} + +#endif // ARM_ELF_STREAMER_H diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 1917564..09e15af 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -12,11 +12,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mccodeemitter" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" #include "MCTargetDesc/ARMMCExpr.h" -#include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -24,8 +26,6 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/ADT/APFloat.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -35,8 +35,8 @@ STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created."); namespace { class ARMMCCodeEmitter : public MCCodeEmitter { - ARMMCCodeEmitter(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT + ARMMCCodeEmitter(const ARMMCCodeEmitter &) LLVM_DELETED_FUNCTION; + void operator=(const ARMMCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; const MCSubtargetInfo &STI; const MCContext &CTX; @@ -934,6 +934,10 @@ getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm()); unsigned SBits = getShiftOp(ShOp); + // While "lsr #32" and "asr #32" exist, they are encoded with a 0 in the shift + // amount. However, it would be an easy mistake to make so check here. + assert((ShImm & ~0x1f) == 0 && "Out of range shift amount"); + // {16-13} = Rn // {12} = isAdd // {11-0} = shifter diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp index 22e14a2..fc8505b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp @@ -9,8 +9,8 @@ #define DEBUG_TYPE "armmcexpr" #include "ARMMCExpr.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" using namespace llvm; const ARMMCExpr* diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index a727e08..b404e6c 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h @@ -28,7 +28,7 @@ private: explicit ARMMCExpr(VariantKind _Kind, const MCExpr *_Expr) : Kind(_Kind), Expr(_Expr) {} - + public: /// @name Construction /// @{ @@ -67,9 +67,6 @@ public: static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; } - - static bool classof(const ARMMCExpr *) { return true; } - }; } // end namespace llvm diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 5df84c8..f4958f3 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -12,8 +12,9 @@ //===----------------------------------------------------------------------===// #include "ARMMCTargetDesc.h" -#include "ARMMCAsmInfo.h" #include "ARMBaseInfo.h" +#include "ARMELFStreamer.h" +#include "ARMMCAsmInfo.h" #include "InstPrinter/ARMInstPrinter.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrAnalysis.h" @@ -71,6 +72,14 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { else // Use CPU to figure out the exact features. ARMArchFeature = "+v7"; + } else if (Len >= Idx+2 && TT[Idx+1] == 's') { + if (NoCPU) + // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk + // Swift + ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+t2xtpk"; + else + // Use CPU to figure out the exact features. + ARMArchFeature = "+v7"; } else { // v7 CPUs have lots of different feature sets. If no CPU is specified, // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return @@ -136,7 +145,7 @@ static MCInstrInfo *createARMMCInstrInfo() { static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) { MCRegisterInfo *X = new MCRegisterInfo(); - InitARMMCRegisterInfo(X, ARM::LR); + InitARMMCRegisterInfo(X, ARM::LR, 0, 0, ARM::PC); return X; } @@ -178,7 +187,8 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, llvm_unreachable("ARM does not support Windows COFF format"); } - return createELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack); + return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack, + TheTriple.getArch() == Triple::thumb); } static MCInstPrinter *createARMMCInstPrinter(const Target &T, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index 510302d..a89981e 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -46,7 +46,7 @@ MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx); -MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT); +MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU); /// createARMELFObjectWriter - Construct an ELF Mach-O object writer. MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 95640f7..b9efe74 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -7,17 +7,18 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" #include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" -#include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCMachOSymbolFlags.h" +#include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Support/ErrorHandling.h" @@ -41,6 +42,12 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter { const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue); + bool requiresExternRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCFragment &Fragment, + unsigned RelocType, const MCSymbolData *SD, + uint64_t FixedValue); + public: ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) @@ -305,6 +312,46 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, Writer->addRelocation(Fragment->getParent(), MRE); } +bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCFragment &Fragment, + unsigned RelocType, + const MCSymbolData *SD, + uint64_t FixedValue) { + // Most cases can be identified purely from the symbol. + if (Writer->doesSymbolRequireExternRelocation(SD)) + return true; + int64_t Value = (int64_t)FixedValue; // The displacement is signed. + int64_t Range; + switch (RelocType) { + default: + return false; + case macho::RIT_ARM_Branch24Bit: + // PC pre-adjustment of 8 for these instructions. + Value -= 8; + // ARM BL/BLX has a 25-bit offset. + Range = 0x1ffffff; + break; + case macho::RIT_ARM_ThumbBranch22Bit: + // PC pre-adjustment of 4 for these instructions. + Value -= 4; + // Thumb BL/BLX has a 24-bit offset. + Range = 0xffffff; + } + // BL/BLX also use external relocations when an internal relocation + // would result in the target being out of range. This gives the linker + // enough information to generate a branch island. + const MCSectionData &SymSD = Asm.getSectionData( + SD->getSymbol().getSection()); + Value += Writer->getSectionAddress(&SymSD); + Value -= Writer->getSectionAddress(Fragment.getParent()); + // If the resultant value would be out of range for an internal relocation, + // use an external instead. + if (Value > Range || Value < -(Range + 1)) + return true; + return false; +} + void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, @@ -373,7 +420,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, } // Check whether we need an external or internal relocation. - if (Writer->doesSymbolRequireExternRelocation(SD)) { + if (requiresExternRelocation(Writer, Asm, *Fragment, RelocType, SD, + FixedValue)) { IsExtern = 1; Index = SD->getIndex(); diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt index 2565994..e17eb4d 100644 --- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMARMDesc ARMAsmBackend.cpp ARMELFObjectWriter.cpp + ARMELFStreamer.cpp ARMMCAsmInfo.cpp ARMMCCodeEmitter.cpp ARMMCExpr.cpp diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index ad60e32..2e266c2 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -16,16 +16,16 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; static cl::opt<bool> @@ -51,7 +51,8 @@ namespace { const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; - bool isA9; + bool isLikeA9; + bool isSwift; unsigned MIIdx; MachineInstr* LastMIs[4]; SmallPtrSet<MachineInstr*, 4> IgnoreStall; @@ -60,6 +61,7 @@ namespace { void pushStack(MachineInstr *MI); MachineInstr *getAccDefMI(MachineInstr *MI) const; unsigned getDefReg(MachineInstr *MI) const; + bool hasLoopHazard(MachineInstr *MI) const; bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const; bool FindMLxHazard(MachineInstr *MI); void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, @@ -135,6 +137,50 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { return Reg; } +/// hasLoopHazard - Check whether an MLx instruction is chained to itself across +/// a single-MBB loop. +bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const { + unsigned Reg = MI->getOperand(1).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return false; + + MachineBasicBlock *MBB = MI->getParent(); + MachineInstr *DefMI = MRI->getVRegDef(Reg); + while (true) { +outer_continue: + if (DefMI->getParent() != MBB) + break; + + if (DefMI->isPHI()) { + for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) { + if (DefMI->getOperand(i + 1).getMBB() == MBB) { + unsigned SrcReg = DefMI->getOperand(i).getReg(); + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + DefMI = MRI->getVRegDef(SrcReg); + goto outer_continue; + } + } + } + } else if (DefMI->isCopyLike()) { + Reg = DefMI->getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + DefMI = MRI->getVRegDef(Reg); + continue; + } + } else if (DefMI->isInsertSubreg()) { + Reg = DefMI->getOperand(2).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + DefMI = MRI->getVRegDef(Reg); + continue; + } + } + + break; + } + + return DefMI == MI; +} + bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { // FIXME: Detect integer instructions properly. const MCInstrDesc &MCID = MI->getDesc(); @@ -149,6 +195,19 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { return false; } +static bool isFpMulInstruction(unsigned Opcode) { + switch (Opcode) { + case ARM::VMULS: + case ARM::VMULfd: + case ARM::VMULfq: + case ARM::VMULD: + case ARM::VMULslfd: + case ARM::VMULslfq: + return true; + default: + return false; + } +} bool MLxExpansion::FindMLxHazard(MachineInstr *MI) { if (NumExpand >= ExpandLimit) @@ -171,6 +230,12 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) { return true; } + // On Swift, we mostly care about hazards from multiplication instructions + // writing the accumulator and the pipelining of loop iterations by out-of- + // order execution. + if (isSwift) + return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI); + if (IgnoreStall.count(MI)) return false; @@ -179,8 +244,8 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) { // preserves the in-order retirement of the instructions. // Look at the next few instructions, if *most* of them can cause hazards, // then the scheduler can't *fix* this, we'd better break up the VMLA. - unsigned Limit1 = isA9 ? 1 : 4; - unsigned Limit2 = isA9 ? 1 : 4; + unsigned Limit1 = isLikeA9 ? 1 : 4; + unsigned Limit2 = isLikeA9 ? 1 : 4; for (unsigned i = 1; i <= 4; ++i) { int Idx = ((int)MIIdx - i + 4) % 4; MachineInstr *NextMI = LastMIs[Idx]; @@ -316,7 +381,8 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { TRI = Fn.getTarget().getRegisterInfo(); MRI = &Fn.getRegInfo(); const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); - isA9 = STI->isCortexA9(); + isLikeA9 = STI->isLikeA9() || STI->isSwift(); + isSwift = STI->isSwift(); bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile index 3e48ed1..f069535 100644 --- a/lib/Target/ARM/Makefile +++ b/lib/Target/ARM/Makefile @@ -16,7 +16,7 @@ BUILT_SOURCES = ARMGenRegisterInfo.inc ARMGenInstrInfo.inc \ ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \ ARMGenDAGISel.inc ARMGenSubtargetInfo.inc \ ARMGenCodeEmitter.inc ARMGenCallingConv.inc \ - ARMGenEDInfo.inc ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \ + ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \ ARMGenMCPseudoLowering.inc ARMGenDisassemblerTables.inc DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc diff --git a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp index 500e3de..fa5681f 100644 --- a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp +++ b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index edd73c2..123ada6 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -281,7 +281,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) .addReg(ARM::R3, RegState::Kill); AddDefaultPred(MIB); - MIB->copyImplicitOps(&*MBBI); + MIB.copyImplicitOps(&*MBBI); // erase the old tBX_RET instruction MBB.erase(MBBI); } @@ -352,7 +352,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, continue; Reg = ARM::PC; (*MIB).setDesc(TII.get(ARM::tPOP_RET)); - MIB->copyImplicitOps(&*MI); + MIB.copyImplicitOps(&*MI); MI = MBB.erase(MI); } MIB.addReg(Reg, getDefRegState(true)); diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 735b255..095736d 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -15,8 +15,8 @@ #include "ARM.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInst.h" using namespace llvm; diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index a39b722..57cc7d8 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -18,21 +18,21 @@ #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/LLVMContext.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" namespace llvm { extern cl::opt<bool> ReuseFrameIndexVals; @@ -390,6 +390,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); DebugLoc dl = MI.getDebugLoc(); + MachineInstrBuilder MIB(*MBB.getParent(), &MI); unsigned Opcode = MI.getOpcode(); const MCInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); @@ -417,7 +418,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); // Remove offset MI.RemoveOperand(FrameRegIdx+1); - MachineInstrBuilder MIB(&MI); return true; } @@ -428,7 +428,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, if (Opcode == ARM::tADDi3) { MI.setDesc(TII.get(Opcode)); removeOperands(MI, FrameRegIdx); - MachineInstrBuilder MIB(&MI); AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg) .addImm(Offset / Scale)); } else { @@ -457,7 +456,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, if (Opcode == ARM::tADDi3) { MI.setDesc(TII.get(Opcode)); removeOperands(MI, FrameRegIdx); - MachineInstrBuilder MIB(&MI); AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask)); } else { MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); @@ -603,6 +601,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc dl = MI.getDebugLoc(); + MachineInstrBuilder MIB(*MBB.getParent(), &MI); while (!MI.getOperand(i).isFI()) { ++i; @@ -719,8 +718,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } // Add predicate back if it's needed. - if (MI.isPredicable()) { - MachineInstrBuilder MIB(&MI); + if (MI.isPredicable()) AddDefaultPred(MIB); - } } diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index d54aa93..97c254c 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -11,12 +11,12 @@ #include "ARM.h" #include "ARMMachineFunctionInfo.h" #include "Thumb2InstrInfo.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBundle.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumITs, "Number of IT blocks inserted"); diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index e9e20dd..67e8ec7 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -51,7 +51,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, MachineBasicBlock *MBB = Tail->getParent(); ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>(); if (!AFI->hasITBlocks()) { - TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest); + TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest); return; } @@ -65,7 +65,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, --MBBI; // Actually replace the tail. - TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest); + TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest); // Fix up IT. if (CC != ARMCC::AL) { @@ -408,7 +408,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // Remove offset and remaining explicit predicate operands. do MI.RemoveOperand(FrameRegIdx+1); while (MI.getNumOperands() > FrameRegIdx+1); - MachineInstrBuilder MIB(&MI); + MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI); AddDefaultPred(MIB); return true; } diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index 29a87d0..1a7a4d4 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -16,12 +16,12 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" using namespace llvm; Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index f18f491..567bc05 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -9,19 +9,20 @@ #define DEBUG_TYPE "t2-reduce-size" #include "ARM.h" -#include "ARMBaseRegisterInfo.h" #include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" #include "ARMSubtarget.h" -#include "Thumb2InstrInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "Thumb2InstrInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/IR/Function.h" // To access Function attributes #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones"); @@ -52,81 +53,82 @@ namespace { unsigned PredCC2 : 2; unsigned PartFlag : 1; // 16-bit instruction does partial flag update unsigned Special : 1; // Needs to be dealt with specially + unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift) }; static const ReduceEntry ReduceTable[] = { - // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, PF, S - { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0 }, - { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1 }, - { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0 }, - { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1 }, - { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1 }, - { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0 }, - { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0 }, - { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0 }, - { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0 }, - //FIXME: Disable CMN, as CCodes are backwards from compare expectations - //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0 }, - { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0 }, - { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0 }, - { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1 }, - { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0 }, - // FIXME: adr.n immediate offset must be multiple of 4. - //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0 }, - { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0 }, - { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0 }, - { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0 }, - { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0 }, - // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less - // likely to cause issue in the loop. As a size / performance workaround, - // they are not marked as such. - { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0 }, - { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1 }, - // FIXME: Do we need the 16-bit 'S' variant? - { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0 }, - { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0 }, - { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0 }, - { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0 }, - { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0 }, - { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0 }, - { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0 }, - { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0 }, - { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1 }, - { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0 }, - { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0 }, - { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0 }, - { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0 }, - { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0 }, - { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1 }, - { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1 }, - { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0 }, - { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1 }, - { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1 }, - - // FIXME: Clean this up after splitting each Thumb load / store opcode - // into multiple ones. - { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1 }, - { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1 }, - - { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1 }, - { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1 }, - { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1 }, - // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent - { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1 }, - { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1 }, + // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM + { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 }, + { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 }, + { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 }, + { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 }, + { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, + { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 }, + { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, + { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 }, + { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 }, + //FIXME: Disable CMN, as CCodes are backwards from compare expectations + //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, + { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, + { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 }, + { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 }, + { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 }, + // FIXME: adr.n immediate offset must be multiple of 4. + //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, + { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, + { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 }, + { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, + { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 }, + // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less + // likely to cause issue in the loop. As a size / performance workaround, + // they are not marked as such. + { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0,0 }, + { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1,0 }, + // FIXME: Do we need the 16-bit 'S' variant? + { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 }, + { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 }, + { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 }, + { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 }, + { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, + { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, + { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, + { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 }, + { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, + { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 }, + { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 }, + { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 }, + { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 }, + { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, + { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, + { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, + { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, + { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, + { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, + + // FIXME: Clean this up after splitting each Thumb load / store opcode + // into multiple ones. + { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + + { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, + { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 }, + { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 }, + // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent + { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, + { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 } }; class Thumb2SizeReduce : public MachineFunctionPass { @@ -175,13 +177,22 @@ namespace { bool LiveCPSR, MachineInstr *CPSRDef, bool IsSelfLoop); + /// ReduceMI - Attempt to reduce MI, return true on success. + bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, + bool LiveCPSR, MachineInstr *CPSRDef, + bool IsSelfLoop); + /// ReduceMBB - Reduce width of instructions in the specified basic block. bool ReduceMBB(MachineBasicBlock &MBB); + + bool OptimizeSize; + bool MinimizeSize; }; char Thumb2SizeReduce::ID = 0; } Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) { + OptimizeSize = MinimizeSize = false; for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { unsigned FromOpc = ReduceTable[i].WideOpc; if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second) @@ -216,8 +227,8 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { bool Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use, bool FirstInSelfLoop) { - // FIXME: Disable check for -Oz (aka OptimizeForSizeHarder). - if (!STI->avoidCPSRPartialUpdate()) + // Disable the check for -Oz (aka OptimizeForSizeHarder). + if (MinimizeSize || !STI->avoidCPSRPartialUpdate()) return false; if (!Def) @@ -578,7 +589,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, // are prioritized, but the table assumes a unique entry for each // source insn opcode. So for now, we hack a local entry record to use. static const ReduceEntry NarrowEntry = - { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 }; + { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 }; if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop)) return true; return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); @@ -596,6 +607,12 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) return false; + if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs && + STI->avoidMOVsShifterOperand()) + // Don't issue movs with shifter operand for some CPUs unless we + // are optimizing / minimizing for size. + return false; + unsigned Reg0 = MI->getOperand(0).getReg(); unsigned Reg1 = MI->getOperand(1).getReg(); // t2MUL is "special". The tied source operand is second, not first. @@ -708,6 +725,12 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) return false; + if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs && + STI->avoidMOVsShifterOperand()) + // Don't issue movs with shifter operand for some CPUs unless we + // are optimizing / minimizing for size. + return false; + unsigned Limit = ~0U; if (Entry.Imm1Limit) Limit = (1 << Entry.Imm1Limit) - 1; @@ -841,6 +864,32 @@ static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { return LiveCPSR; } +bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, + bool LiveCPSR, MachineInstr *CPSRDef, + bool IsSelfLoop) { + unsigned Opcode = MI->getOpcode(); + DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); + if (OPI == ReduceOpcodeMap.end()) + return false; + const ReduceEntry &Entry = ReduceTable[OPI->second]; + + // Don't attempt normal reductions on "special" cases for now. + if (Entry.Special) + return ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); + + // Try to transform to a 16-bit two-address instruction. + if (Entry.NarrowOpc2 && + ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) + return true; + + // Try to transform to a 16-bit non-two-address instruction. + if (Entry.NarrowOpc1 && + ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) + return true; + + return false; +} + bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Modified = false; @@ -865,40 +914,20 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); - unsigned Opcode = MI->getOpcode(); - DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); - if (OPI != ReduceOpcodeMap.end()) { - const ReduceEntry &Entry = ReduceTable[OPI->second]; - // Ignore "special" cases for now. - if (Entry.Special) { - if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { - Modified = true; - MachineBasicBlock::instr_iterator I = prior(NextMII); - MI = &*I; - } - goto ProcessNext; - } - - // Try to transform to a 16-bit two-address instruction. - if (Entry.NarrowOpc2 && - ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { - Modified = true; - MachineBasicBlock::instr_iterator I = prior(NextMII); - MI = &*I; - goto ProcessNext; - } - - // Try to transform to a 16-bit non-two-address instruction. - if (Entry.NarrowOpc1 && - ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { - Modified = true; - MachineBasicBlock::instr_iterator I = prior(NextMII); - MI = &*I; - } + // Does NextMII belong to the same bundle as MI? + bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred(); + + if (ReduceMI(MBB, MI, LiveCPSR, CPSRDef, IsSelfLoop)) { + Modified = true; + MachineBasicBlock::instr_iterator I = prior(NextMII); + MI = &*I; + // Removing and reinserting the first instruction in a bundle will break + // up the bundle. Fix the bundling if it was broken. + if (NextInSameBundle && !NextMII->isBundledWithPred()) + NextMII->bundleWithPred(); } - ProcessNext: - if (NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle()) { + if (!NextInSameBundle && MI->isInsideBundle()) { // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill // marker is only on the BUNDLE instruction. Process the BUNDLE // instruction as we finish with the bundled instruction to work around @@ -931,6 +960,13 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo()); STI = &TM.getSubtarget<ARMSubtarget>(); + // Optimizing / minimizing size? + AttributeSet FnAttrs = MF.getFunction()->getAttributes(); + OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize); + MinimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::MinSize); + bool Modified = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) Modified |= ReduceMBB(*I); |
