diff options
Diffstat (limited to 'lib/Target/ARM')
80 files changed, 6355 insertions, 2118 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 16d0da3..acb57f7 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -1,4 +1,4 @@ -//===-- ARM.h - Top-level interface for ARM representation---- --*- C++ -*-===// +//===-- ARM.h - Top-level interface for ARM representation ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 86aeeb2..b05fe62 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -1,4 +1,4 @@ -//===- ARM.td - Describe the ARM Target Machine ------------*- tablegen -*-===// +//===-- ARM.td - Describe the ARM Target Machine -----------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -32,9 +32,15 @@ def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", "Enable VFP3 instructions", [FeatureVFP2]>; +def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", + "Enable VFP4 instructions", + [FeatureVFP3]>; def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable NEON instructions", [FeatureVFP3]>; +def FeatureNEON2 : SubtargetFeature<"neon2", "HasNEON2", "true", + "Enable Advanced SIMD2 instructions", + [FeatureNEON]>; def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", "Enable Thumb2 instructions">; def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", @@ -70,6 +76,8 @@ def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", "true", "Use NEON for single precision FP">; +// Allow more precision in FP computation +def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">; // Disable 32-bit to 16-bit narrowing for experimentation. def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", @@ -83,6 +91,11 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", "AvoidCPSRPartialUpdate", "true", "Avoid CPSR partial update for OOO execution">; +// Some processors perform return stack prediction. CodeGen should avoid issue +// "normal" call instructions to callees which do not return. +def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true", + "Has return address stack">; + /// Some M architectures don't have the DSP extension (v7E-M vs. v7M) def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true", "Supports v7 DSP instructions in Thumb2">; @@ -198,13 +211,14 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2, // V7a Processors. def : Processor<"cortex-a8", CortexA8Itineraries, [ProcA8, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2]>; + FeatureDSPThumb2, FeatureHasRAS]>; def : Processor<"cortex-a9", CortexA9Itineraries, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2]>; + FeatureDSPThumb2, FeatureHasRAS]>; def : Processor<"cortex-a9-mp", CortexA9Itineraries, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureMP]>; + FeatureDSPThumb2, FeatureMP, + FeatureHasRAS]>; // V7M Processors. def : ProcNoItin<"cortex-m3", [HasV7Ops, diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 6ae287a..4ec19cc 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -84,6 +84,7 @@ namespace { void EmitTextAttribute(unsigned Attribute, StringRef String) { switch (Attribute) { + default: llvm_unreachable("Unsupported Text attribute in ASM Mode"); case ARMBuildAttrs::CPU_name: Streamer.EmitRawText(StringRef("\t.cpu ") + String.lower()); break; @@ -92,7 +93,6 @@ namespace { case ARMBuildAttrs::VFP_arch: Streamer.EmitRawText(StringRef("\t.fpu ") + String.lower()); break; - default: assert(0 && "Unsupported Text attribute in ASM Mode"); break; } } void Finish() { } @@ -196,6 +196,7 @@ namespace { AttributeItemType item = Contents[i]; Streamer.EmitULEB128IntValue(item.Tag, 0); switch (item.Type) { + default: llvm_unreachable("Invalid attribute type"); case AttributeItemType::NumericAttribute: Streamer.EmitULEB128IntValue(item.IntValue, 0); break; @@ -203,8 +204,6 @@ namespace { Streamer.EmitBytes(item.StringValue.upper(), 0); Streamer.EmitIntValue(0, 1); // '\0' break; - default: - assert(0 && "Invalid attribute type"); } } @@ -299,6 +298,22 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() { OutStreamer.EmitLabel(CurrentFnSym); } +void ARMAsmPrinter::EmitXXStructor(const Constant *CV) { + uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType()); + assert(Size && "C++ constructor pointer had zero size!"); + + const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts()); + assert(GV && "C++ constructor pointer was not a GlobalValue!"); + + const MCExpr *E = MCSymbolRefExpr::Create(Mang->getSymbol(GV), + (Subtarget->isTargetDarwin() + ? MCSymbolRefExpr::VK_None + : MCSymbolRefExpr::VK_ARM_TARGET1), + OutContext); + + OutStreamer.EmitValue(E, Size); +} + /// runOnMachineFunction - This uses the EmitInstruction() /// method to print assembly for each instruction. /// @@ -315,8 +330,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned TF = MO.getTargetFlags(); switch (MO.getType()) { - default: - assert(0 && "<unknown operand type>"); + default: llvm_unreachable("<unknown operand type>"); case MachineOperand::MO_Register: { unsigned Reg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(Reg)); @@ -585,10 +599,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified); // Emit ARM Build Attributes - if (Subtarget->isTargetELF()) { - + if (Subtarget->isTargetELF()) emitAttributes(); - } } @@ -719,15 +731,25 @@ void ARMAsmPrinter::emitAttributes() { if (Subtarget->hasNEON() && emitFPU) { /* NEON is not exactly a VFP architecture, but GAS emit one of - * neon/vfpv3/vfpv2 for .fpu parameters */ - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon"); + * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ + if (Subtarget->hasNEON2()) + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon-vfpv4"); + else + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon"); /* If emitted for NEON, omit from VFP below, since you can have both * NEON and VFP in build attributes but only one .fpu */ emitFPU = false; } + /* VFPv4 + .fpu */ + if (Subtarget->hasVFP4()) { + AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, + ARMBuildAttrs::AllowFPv4A); + if (emitFPU) + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv4"); + /* VFPv3 + .fpu */ - if (Subtarget->hasVFP3()) { + } else if (Subtarget->hasVFP3()) { AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, ARMBuildAttrs::AllowFPv3A); if (emitFPU) @@ -817,7 +839,6 @@ static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber, static MCSymbolRefExpr::VariantKind getModifierVariantKind(ARMCP::ARMCPModifier Modifier) { switch (Modifier) { - default: llvm_unreachable("Unknown modifier!"); case ARMCP::no_modifier: return MCSymbolRefExpr::VK_None; case ARMCP::TLSGD: return MCSymbolRefExpr::VK_ARM_TLSGD; case ARMCP::TPOFF: return MCSymbolRefExpr::VK_ARM_TPOFF; @@ -825,7 +846,7 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) { case ARMCP::GOT: return MCSymbolRefExpr::VK_ARM_GOT; case ARMCP::GOTOFF: return MCSymbolRefExpr::VK_ARM_GOTOFF; } - return MCSymbolRefExpr::VK_None; + llvm_unreachable("Invalid ARMCPModifier!"); } MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) { @@ -1093,7 +1114,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { switch (Opc) { default: MI->dump(); - assert(0 && "Unsupported opcode for unwinding information"); + llvm_unreachable("Unsupported opcode for unwinding information"); case ARM::tPUSH: // Special case here: no src & dst reg, but two extra imp ops. StartOp = 2; NumOffset = 2; @@ -1108,6 +1129,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { break; case ARM::STR_PRE_IMM: case ARM::STR_PRE_REG: + case ARM::t2STR_PRE: assert(MI->getOperand(2).getReg() == ARM::SP && "Only stack pointer as a source reg is supported"); RegList.push_back(SrcReg); @@ -1121,14 +1143,16 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { switch (Opc) { default: MI->dump(); - assert(0 && "Unsupported opcode for unwinding information"); + llvm_unreachable("Unsupported opcode for unwinding information"); case ARM::MOVr: + case ARM::tMOVr: Offset = 0; break; case ARM::ADDri: Offset = -MI->getOperand(2).getImm(); break; case ARM::SUBri: + case ARM::t2SUBri: Offset = MI->getOperand(2).getImm(); break; case ARM::tSUBspi: @@ -1166,16 +1190,16 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { OutStreamer.EmitPad(Offset); } else { MI->dump(); - assert(0 && "Unsupported opcode for unwinding information"); + llvm_unreachable("Unsupported opcode for unwinding information"); } } else if (DstReg == ARM::SP) { // FIXME: .movsp goes here MI->dump(); - assert(0 && "Unsupported opcode for unwinding information"); + llvm_unreachable("Unsupported opcode for unwinding information"); } else { MI->dump(); - assert(0 && "Unsupported opcode for unwinding information"); + llvm_unreachable("Unsupported opcode for unwinding information"); } } } @@ -1204,7 +1228,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Check for manual lowerings. unsigned Opc = MI->getOpcode(); switch (Opc) { - case ARM::t2MOVi32imm: assert(0 && "Should be lowered by thumb2it pass"); + case ARM::t2MOVi32imm: llvm_unreachable("Should be lowered by thumb2it pass"); case ARM::DBG_VALUE: { if (isVerbose() && OutStreamer.hasRawTextSupport()) { SmallString<128> TmpStr; @@ -1319,6 +1343,60 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } + case ARM::BMOVPCBr9_CALL: + case ARM::BMOVPCB_CALL: { + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVr); + TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + // Add 's' bit operand (always reg0 for this) + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::Bcc); + const GlobalValue *GV = MI->getOperand(0).getGlobal(); + MCSymbol *GVSym = Mang->getSymbol(GV); + const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); + TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr)); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + return; + } + case ARM::t2BMOVPCBr9_CALL: + case ARM::t2BMOVPCB_CALL: { + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::tMOVr); + TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::t2B); + const GlobalValue *GV = MI->getOperand(0).getGlobal(); + MCSymbol *GVSym = Mang->getSymbol(GV); + const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); + TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr)); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + return; + } case ARM::MOVi16_ga_pcrel: case ARM::t2MOVi16_ga_pcrel: { MCInst TmpInst; diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 7741fc4..4b276c5 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -1,4 +1,4 @@ -//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file ------------===// +//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -73,6 +73,7 @@ public: virtual void EmitFunctionEntryLabel(); void EmitStartOfAsmFile(Module &M); void EmitEndOfAsmFile(Module &M); + void EmitXXStructor(const Constant *CV); // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 8bf5475..75b796e 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1,4 +1,4 @@ -//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===// +//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===// // // The LLVM Compiler Infrastructure // @@ -156,9 +156,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned OffImm = MI->getOperand(NumOps-2).getImm(); ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm(); switch (AddrMode) { - default: - assert(false && "Unknown indexed op!"); - return NULL; + default: llvm_unreachable("Unknown indexed op!"); case ARMII::AddrMode2: { bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; unsigned Amt = ARM_AM::getAM2Offset(OffImm); @@ -505,15 +503,11 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, std::vector<MachineOperand> &Pred) const { - // FIXME: This confuses implicit_def with optional CPSR def. - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.getImplicitDefs() && !MI->hasOptionalDef()) - return false; - bool Found = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == ARM::CPSR) { + if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) || + (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) { Pred.push_back(MO); Found = true; } @@ -558,85 +552,84 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { if (MCID.getSize()) return MCID.getSize(); - // If this machine instr is an inline asm, measure it. - if (MI->getOpcode() == ARM::INLINEASM) - return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); - if (MI->isLabel()) - return 0; - unsigned Opc = MI->getOpcode(); - switch (Opc) { - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - case TargetOpcode::PROLOG_LABEL: - case TargetOpcode::EH_LABEL: - case TargetOpcode::DBG_VALUE: - return 0; - case TargetOpcode::BUNDLE: - return getInstBundleLength(MI); - case ARM::MOVi16_ga_pcrel: - case ARM::MOVTi16_ga_pcrel: - case ARM::t2MOVi16_ga_pcrel: - case ARM::t2MOVTi16_ga_pcrel: - return 4; - case ARM::MOVi32imm: - case ARM::t2MOVi32imm: - return 8; - case ARM::CONSTPOOL_ENTRY: - // If this machine instr is a constant pool entry, its size is recorded as - // operand #2. - return MI->getOperand(2).getImm(); - case ARM::Int_eh_sjlj_longjmp: - return 16; - case ARM::tInt_eh_sjlj_longjmp: - return 10; - case ARM::Int_eh_sjlj_setjmp: - case ARM::Int_eh_sjlj_setjmp_nofp: - return 20; - case ARM::tInt_eh_sjlj_setjmp: - case ARM::t2Int_eh_sjlj_setjmp: - case ARM::t2Int_eh_sjlj_setjmp_nofp: - return 12; - case ARM::BR_JTr: - case ARM::BR_JTm: - case ARM::BR_JTadd: - case ARM::tBR_JTr: - case ARM::t2BR_JT: - case ARM::t2TBB_JT: - case ARM::t2TBH_JT: { - // These are jumptable branches, i.e. a branch followed by an inlined - // jumptable. The size is 4 + 4 * number of entries. For TBB, each - // entry is one byte; TBH two byte each. - unsigned EntrySize = (Opc == ARM::t2TBB_JT) - ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); - unsigned NumOps = MCID.getNumOperands(); - MachineOperand JTOP = - MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); - unsigned JTI = JTOP.getIndex(); - const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - assert(MJTI != 0); - const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); - assert(JTI < JT.size()); - // Thumb instructions are 2 byte aligned, but JT entries are 4 byte - // 4 aligned. The assembler / linker may add 2 byte padding just before - // the JT entries. The size does not include this padding; the - // constant islands pass does separate bookkeeping for it. - // FIXME: If we know the size of the function is less than (1 << 16) *2 - // bytes, we can use 16-bit entries instead. Then there won't be an - // alignment issue. - unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; - unsigned NumEntries = getNumJTEntries(JT, JTI); - if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) - // Make sure the instruction that follows TBB is 2-byte aligned. - // FIXME: Constant island pass should insert an "ALIGN" instruction - // instead. - ++NumEntries; - return NumEntries * EntrySize + InstSize; - } - default: - // Otherwise, pseudo-instruction sizes are zero. - return 0; - } - return 0; // Not reached + // If this machine instr is an inline asm, measure it. + if (MI->getOpcode() == ARM::INLINEASM) + return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); + if (MI->isLabel()) + return 0; + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::PROLOG_LABEL: + case TargetOpcode::EH_LABEL: + case TargetOpcode::DBG_VALUE: + return 0; + case TargetOpcode::BUNDLE: + return getInstBundleLength(MI); + case ARM::MOVi16_ga_pcrel: + case ARM::MOVTi16_ga_pcrel: + case ARM::t2MOVi16_ga_pcrel: + case ARM::t2MOVTi16_ga_pcrel: + return 4; + case ARM::MOVi32imm: + case ARM::t2MOVi32imm: + return 8; + case ARM::CONSTPOOL_ENTRY: + // If this machine instr is a constant pool entry, its size is recorded as + // operand #2. + return MI->getOperand(2).getImm(); + case ARM::Int_eh_sjlj_longjmp: + return 16; + case ARM::tInt_eh_sjlj_longjmp: + return 10; + case ARM::Int_eh_sjlj_setjmp: + case ARM::Int_eh_sjlj_setjmp_nofp: + return 20; + case ARM::tInt_eh_sjlj_setjmp: + case ARM::t2Int_eh_sjlj_setjmp: + case ARM::t2Int_eh_sjlj_setjmp_nofp: + return 12; + case ARM::BR_JTr: + case ARM::BR_JTm: + case ARM::BR_JTadd: + case ARM::tBR_JTr: + case ARM::t2BR_JT: + case ARM::t2TBB_JT: + case ARM::t2TBH_JT: { + // These are jumptable branches, i.e. a branch followed by an inlined + // jumptable. The size is 4 + 4 * number of entries. For TBB, each + // entry is one byte; TBH two byte each. + unsigned EntrySize = (Opc == ARM::t2TBB_JT) + ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); + unsigned NumOps = MCID.getNumOperands(); + MachineOperand JTOP = + MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); + unsigned JTI = JTOP.getIndex(); + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + assert(MJTI != 0); + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + assert(JTI < JT.size()); + // Thumb instructions are 2 byte aligned, but JT entries are 4 byte + // 4 aligned. The assembler / linker may add 2 byte padding just before + // the JT entries. The size does not include this padding; the + // constant islands pass does separate bookkeeping for it. + // FIXME: If we know the size of the function is less than (1 << 16) *2 + // bytes, we can use 16-bit entries instead. Then there won't be an + // alignment issue. + unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; + unsigned NumEntries = getNumJTEntries(JT, JTI); + if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) + // Make sure the instruction that follows TBB is 2-byte aligned. + // FIXME: Constant island pass should insert an "ALIGN" instruction + // instead. + ++NumEntries; + return NumEntries * EntrySize + InstSize; + } + default: + // Otherwise, pseudo-instruction sizes are zero. + return 0; + } } unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const { @@ -765,8 +758,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, break; case 16: if (ARM::QPRRegClass.hasSubClassEq(RC)) { - if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo)) + // Use aligned spills if the stack can be realigned. + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) .addFrameIndex(FI).addImm(16) .addReg(SrcReg, getKillRegState(isKill)) .addMemOperand(MMO)); @@ -851,7 +845,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return MI->getOperand(0).getReg(); } break; - case ARM::VST1q64Pseudo: + case ARM::VST1q64: if (MI->getOperand(0).isFI() && MI->getOperand(2).getSubReg() == 0) { FrameIndex = MI->getOperand(0).getIndex(); @@ -914,8 +908,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, break; case 16: if (ARM::QPRRegClass.hasSubClassEq(RC)) { - if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg) + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) .addFrameIndex(FI).addImm(16) .addMemOperand(MMO)); } else { @@ -937,11 +931,10 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) .addFrameIndex(FI)) .addMemOperand(MMO); - MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); - MIB.addReg(DestReg, RegState::Define | RegState::Implicit); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); } } else llvm_unreachable("Unknown reg class!"); @@ -952,15 +945,14 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) .addFrameIndex(FI)) .addMemOperand(MMO); - MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI); - MIB.addReg(DestReg, RegState::Define | RegState::Implicit); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI); } else llvm_unreachable("Unknown reg class!"); break; @@ -997,7 +989,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, return MI->getOperand(0).getReg(); } break; - case ARM::VLD1q64Pseudo: + case ARM::VLD1q64: if (MI->getOperand(1).isFI() && MI->getOperand(0).getSubReg() == 0) { FrameIndex = MI->getOperand(1).getIndex(); @@ -1406,7 +1398,10 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, // saves compile time, because it doesn't require every single // stack slot reference to depend on the instruction that does the // modification. - if (MI->definesRegister(ARM::SP)) + // Calls don't actually change the stack pointer, even if they have imp-defs. + // No ARM calling conventions change the stack pointer. (X86 calling + // conventions sometimes do). + if (!MI->isCall() && MI->definesRegister(ARM::SP)) return true; return false; @@ -1471,13 +1466,12 @@ llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { int llvm::getMatchingCondBranchOpcode(int Opc) { if (Opc == ARM::B) return ARM::Bcc; - else if (Opc == ARM::tB) + if (Opc == ARM::tB) return ARM::tBcc; - else if (Opc == ARM::t2B) - return ARM::t2Bcc; + if (Opc == ARM::t2B) + return ARM::t2Bcc; llvm_unreachable("Unknown unconditional branch opcode!"); - return 0; } @@ -1650,7 +1644,6 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, } default: llvm_unreachable("Unsupported addressing mode!"); - break; } Offset += InstrOffs * Scale; @@ -1801,6 +1794,8 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) { const MachineOperand &MO = Instr.getOperand(IO); + if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) + return false; if (!MO.isReg()) continue; // This instruction modifies or uses CPSR after the one we want to @@ -1862,6 +1857,10 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, for (unsigned IO = 0, EO = Instr.getNumOperands(); !isSafe && IO != EO; ++IO) { const MachineOperand &MO = Instr.getOperand(IO); + if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) { + isSafe = true; + break; + } if (!MO.isReg() || MO.getReg() != ARM::CPSR) continue; if (MO.isDef()) { @@ -2012,7 +2011,6 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, switch (Opc) { default: llvm_unreachable("Unexpected multi-uops instruction!"); - break; case ARM::VLDMQIA: case ARM::VSTMQIA: return 2; @@ -2583,9 +2581,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD2DUPd8: case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: - case ARM::VLD2DUPd8_UPD: - case ARM::VLD2DUPd16_UPD: - case ARM::VLD2DUPd32_UPD: + case ARM::VLD2DUPd8wb_fixed: + case ARM::VLD2DUPd16wb_fixed: + case ARM::VLD2DUPd32wb_fixed: + case ARM::VLD2DUPd8wb_register: + case ARM::VLD2DUPd16wb_register: + case ARM::VLD2DUPd32wb_register: case ARM::VLD4DUPd8: case ARM::VLD4DUPd16: case ARM::VLD4DUPd32: @@ -2693,33 +2694,33 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, if (DefAlign < 8 && Subtarget.isCortexA9()) switch (DefMCID.getOpcode()) { default: break; - case ARM::VLD1q8Pseudo: - case ARM::VLD1q16Pseudo: - case ARM::VLD1q32Pseudo: - case ARM::VLD1q64Pseudo: - case ARM::VLD1q8PseudoWB_register: - case ARM::VLD1q16PseudoWB_register: - case ARM::VLD1q32PseudoWB_register: - case ARM::VLD1q64PseudoWB_register: - case ARM::VLD1q8PseudoWB_fixed: - case ARM::VLD1q16PseudoWB_fixed: - case ARM::VLD1q32PseudoWB_fixed: - case ARM::VLD1q64PseudoWB_fixed: - case ARM::VLD2d8Pseudo: - case ARM::VLD2d16Pseudo: - case ARM::VLD2d32Pseudo: + case ARM::VLD1q8: + case ARM::VLD1q16: + case ARM::VLD1q32: + case ARM::VLD1q64: + case ARM::VLD1q8wb_register: + case ARM::VLD1q16wb_register: + case ARM::VLD1q32wb_register: + case ARM::VLD1q64wb_register: + case ARM::VLD1q8wb_fixed: + case ARM::VLD1q16wb_fixed: + case ARM::VLD1q32wb_fixed: + case ARM::VLD1q64wb_fixed: + case ARM::VLD2d8: + case ARM::VLD2d16: + case ARM::VLD2d32: case ARM::VLD2q8Pseudo: case ARM::VLD2q16Pseudo: case ARM::VLD2q32Pseudo: - case ARM::VLD2d8PseudoWB_fixed: - case ARM::VLD2d16PseudoWB_fixed: - case ARM::VLD2d32PseudoWB_fixed: + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d32wb_fixed: case ARM::VLD2q8PseudoWB_fixed: case ARM::VLD2q16PseudoWB_fixed: case ARM::VLD2q32PseudoWB_fixed: - case ARM::VLD2d8PseudoWB_register: - case ARM::VLD2d16PseudoWB_register: - case ARM::VLD2d32PseudoWB_register: + case ARM::VLD2d8wb_register: + case ARM::VLD2d16wb_register: + case ARM::VLD2d32wb_register: case ARM::VLD2q8PseudoWB_register: case ARM::VLD2q16PseudoWB_register: case ARM::VLD2q32PseudoWB_register: @@ -2767,9 +2768,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD2DUPd8Pseudo: case ARM::VLD2DUPd16Pseudo: case ARM::VLD2DUPd32Pseudo: - case ARM::VLD2DUPd8Pseudo_UPD: - case ARM::VLD2DUPd16Pseudo_UPD: - case ARM::VLD2DUPd32Pseudo_UPD: + case ARM::VLD2DUPd8PseudoWB_fixed: + case ARM::VLD2DUPd16PseudoWB_fixed: + case ARM::VLD2DUPd32PseudoWB_fixed: + case ARM::VLD2DUPd8PseudoWB_register: + case ARM::VLD2DUPd16PseudoWB_register: + case ARM::VLD2DUPd32PseudoWB_register: case ARM::VLD4DUPd8Pseudo: case ARM::VLD4DUPd16Pseudo: case ARM::VLD4DUPd32Pseudo: @@ -2848,7 +2852,7 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MCInstrDesc &MCID = MI->getDesc(); unsigned Class = MCID.getSchedClass(); unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; - if (PredCost && MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) + if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) // When predicated, CPSR is an additional source operand for CPSR updating // instructions, this apparently increases their latencies. *PredCost = 1; @@ -2997,3 +3001,7 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // This will go before any implicit ops. AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); } + +bool ARMBaseInstrInfo::hasNOP() const { + return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; +} diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 68e8208..314e317 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -1,4 +1,4 @@ -//===- ARMBaseInstrInfo.h - ARM Base Instruction Information ----*- C++ -*-===// +//===-- ARMBaseInstrInfo.h - ARM Base Instruction Information ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -35,6 +35,9 @@ protected: explicit ARMBaseInstrInfo(const ARMSubtarget &STI); public: + // Return whether the target has an explicit NOP encoding. + bool hasNOP() const; + // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. virtual unsigned getUnindexedOpcode(unsigned Opc) const =0; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 8ee6ce2..d2aff9a 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- ARMBaseRegisterInfo.cpp - ARM Register Information -------*- C++ -*-===// +//===-- ARMBaseRegisterInfo.cpp - ARM Register Information ----------------===// // // The LLVM Compiler Infrastructure // @@ -61,28 +61,14 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, BasePtr(ARM::R6) { } -const unsigned* +const uint16_t* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - static const unsigned CalleeSavedRegs[] = { - ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8, - ARM::R7, ARM::R6, ARM::R5, ARM::R4, - - ARM::D15, ARM::D14, ARM::D13, ARM::D12, - ARM::D11, ARM::D10, ARM::D9, ARM::D8, - 0 - }; - - static const unsigned DarwinCalleeSavedRegs[] = { - // Darwin ABI deviates from ARM standard ABI. R9 is not a callee-saved - // register. - ARM::LR, ARM::R7, ARM::R6, ARM::R5, ARM::R4, - ARM::R11, ARM::R10, ARM::R8, + return (STI.isTargetIOS()) ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; +} - ARM::D15, ARM::D14, ARM::D13, ARM::D12, - ARM::D11, ARM::D10, ARM::D9, ARM::D8, - 0 - }; - return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs; +const uint32_t* +ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const { + return (STI.isTargetIOS()) ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; } BitVector ARMBaseRegisterInfo:: @@ -93,7 +79,6 @@ getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(ARM::SP); Reserved.set(ARM::PC); - Reserved.set(ARM::FPSCR); if (TFI->hasFP(MF)) Reserved.set(FramePtr); if (hasBasePointer(MF)) @@ -135,104 +120,6 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF, return false; } -const TargetRegisterClass * -ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, - const TargetRegisterClass *B, - unsigned SubIdx) const { - switch (SubIdx) { - default: return 0; - case ARM::ssub_0: - case ARM::ssub_1: - case ARM::ssub_2: - case ARM::ssub_3: { - // S sub-registers. - if (A->getSize() == 8) { - if (B == &ARM::SPR_8RegClass) - return &ARM::DPR_8RegClass; - assert(B == &ARM::SPRRegClass && "Expecting SPR register class!"); - if (A == &ARM::DPR_8RegClass) - return A; - return &ARM::DPR_VFP2RegClass; - } - - if (A->getSize() == 16) { - if (B == &ARM::SPR_8RegClass) - return &ARM::QPR_8RegClass; - return &ARM::QPR_VFP2RegClass; - } - - if (A->getSize() == 32) { - if (B == &ARM::SPR_8RegClass) - return 0; // Do not allow coalescing! - return &ARM::QQPR_VFP2RegClass; - } - - assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); - return 0; // Do not allow coalescing! - } - case ARM::dsub_0: - case ARM::dsub_1: - case ARM::dsub_2: - case ARM::dsub_3: { - // D sub-registers. - if (A->getSize() == 16) { - if (B == &ARM::DPR_VFP2RegClass) - return &ARM::QPR_VFP2RegClass; - if (B == &ARM::DPR_8RegClass) - return 0; // Do not allow coalescing! - return A; - } - - if (A->getSize() == 32) { - if (B == &ARM::DPR_VFP2RegClass) - return &ARM::QQPR_VFP2RegClass; - if (B == &ARM::DPR_8RegClass) - return 0; // Do not allow coalescing! - return A; - } - - assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); - if (B != &ARM::DPRRegClass) - return 0; // Do not allow coalescing! - return A; - } - case ARM::dsub_4: - case ARM::dsub_5: - case ARM::dsub_6: - case ARM::dsub_7: { - // D sub-registers of QQQQ registers. - if (A->getSize() == 64 && B == &ARM::DPRRegClass) - return A; - return 0; // Do not allow coalescing! - } - - case ARM::qsub_0: - case ARM::qsub_1: { - // Q sub-registers. - if (A->getSize() == 32) { - if (B == &ARM::QPR_VFP2RegClass) - return &ARM::QQPR_VFP2RegClass; - if (B == &ARM::QPR_8RegClass) - return 0; // Do not allow coalescing! - return A; - } - - assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); - if (B == &ARM::QPRRegClass) - return A; - return 0; // Do not allow coalescing! - } - case ARM::qsub_2: - case ARM::qsub_3: { - // Q sub-registers of QQQQ registers. - if (A->getSize() == 64 && B == &ARM::QPRRegClass) - return A; - return 0; // Do not allow coalescing! - } - } - return 0; -} - bool ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC, SmallVectorImpl<unsigned> &SubIndices, @@ -403,7 +290,7 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, /// getRawAllocationOrder - Returns the register allocation order for a /// specified register class with a target-dependent hint. -ArrayRef<unsigned> +ArrayRef<uint16_t> ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC, unsigned HintType, unsigned HintReg, const MachineFunction &MF) const { @@ -412,71 +299,71 @@ ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC, // of register pairs. // No FP, R9 is available. - static const unsigned GPREven1[] = { + static const uint16_t GPREven1[] = { ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10, ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R9, ARM::R11 }; - static const unsigned GPROdd1[] = { + static const uint16_t GPROdd1[] = { ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11, ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, ARM::R10 }; // FP is R7, R9 is available. - static const unsigned GPREven2[] = { + static const uint16_t GPREven2[] = { ARM::R0, ARM::R2, ARM::R4, ARM::R8, ARM::R10, ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R9, ARM::R11 }; - static const unsigned GPROdd2[] = { + static const uint16_t GPROdd2[] = { ARM::R1, ARM::R3, ARM::R5, ARM::R9, ARM::R11, ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, ARM::R10 }; // FP is R11, R9 is available. - static const unsigned GPREven3[] = { + static const uint16_t GPREven3[] = { ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R9 }; - static const unsigned GPROdd3[] = { + static const uint16_t GPROdd3[] = { ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9, ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7, ARM::R8 }; // No FP, R9 is not available. - static const unsigned GPREven4[] = { + static const uint16_t GPREven4[] = { ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R10, ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8, ARM::R11 }; - static const unsigned GPROdd4[] = { + static const uint16_t GPROdd4[] = { ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R11, ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, ARM::R10 }; // FP is R7, R9 is not available. - static const unsigned GPREven5[] = { + static const uint16_t GPREven5[] = { ARM::R0, ARM::R2, ARM::R4, ARM::R10, ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8, ARM::R11 }; - static const unsigned GPROdd5[] = { + static const uint16_t GPROdd5[] = { ARM::R1, ARM::R3, ARM::R5, ARM::R11, ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, ARM::R10 }; // FP is R11, R9 is not available. - static const unsigned GPREven6[] = { + static const uint16_t GPREven6[] = { ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8 }; - static const unsigned GPROdd6[] = { + static const uint16_t GPROdd6[] = { ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8 }; @@ -597,11 +484,16 @@ ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); if (!EnableBasePointer) return false; - if (needsStackRealignment(MF) && MFI->hasVarSizedObjects()) + // When outgoing call frames are so large that we adjust the stack pointer + // around the call, we can no longer use the stack pointer to reach the + // emergency spill slot. + if (needsStackRealignment(MF) && (MFI->hasVarSizedObjects() || + !TFI->hasReservedCallFrame(MF))) return true; // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited @@ -626,13 +518,28 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineRegisterInfo *MRI = &MF.getRegInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); // We can't realign the stack if: // 1. Dynamic stack realignment is explicitly disabled, // 2. This is a Thumb1 function (it's not useful, so we don't bother), or // 3. There are VLAs in the function and the base pointer is disabled. - return (MF.getTarget().Options.RealignStack && !AFI->isThumb1OnlyFunction() && - (!MFI->hasVarSizedObjects() || EnableBasePointer)); + if (!MF.getTarget().Options.RealignStack) + return false; + if (AFI->isThumb1OnlyFunction()) + return false; + // Stack realignment requires a frame pointer. If we already started + // register allocation with frame pointer elimination, it is too late now. + if (!MRI->canReserveReg(FramePtr)) + return false; + // We may also need a base pointer if there are dynamic allocas. + if (!MFI->hasVarSizedObjects()) + return true; + if (!EnableBasePointer) + return false; + // A base pointer is required and allowed. Check that it isn't too late to + // reserve it. + return MRI->canReserveReg(BasePtr); } bool ARMBaseRegisterInfo:: @@ -640,7 +547,7 @@ needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); - bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) || + bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->hasFnAttr(Attribute::StackAlignment)); return requiresRealignment && canRealignStack(MF); @@ -666,12 +573,10 @@ ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const { llvm_unreachable("What is the exception register"); - return 0; } unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const { llvm_unreachable("What is the exception handler register"); - return 0; } unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, @@ -879,7 +784,7 @@ int64_t ARMBaseRegisterInfo:: getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const { const MCInstrDesc &Desc = MI->getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); - int64_t InstrOffs = 0;; + int64_t InstrOffs = 0; int Scale = 1; unsigned ImmIdx = 0; switch (AddrMode) { @@ -920,7 +825,6 @@ getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const { } default: llvm_unreachable("Unsupported addressing mode!"); - break; } return InstrOffs * Scale; @@ -1116,7 +1020,6 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, break; default: llvm_unreachable("Unsupported addressing mode!"); - break; } Offset += getFrameIndexInstrOffset(MI, i); @@ -1158,6 +1061,21 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj); + // PEI::scavengeFrameVirtualRegs() cannot accurately track SPAdj because the + // call frame setup/destroy instructions have already been eliminated. That + // means the stack pointer cannot be used to access the emergency spill slot + // when !hasReservedCallFrame(). +#ifndef NDEBUG + if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){ + assert(TFI->hasReservedCallFrame(MF) && + "Cannot use SP to access the emergency spill slot in " + "functions without a reserved call frame"); + assert(!MF.getFrameInfo()->hasVarSizedObjects() && + "Cannot use SP to access the emergency spill slot in " + "functions with variable sized frame objects"); + } +#endif // NDEBUG + // Special handling of dbg_value instructions. if (MI.isDebugValue()) { MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index fee17ff..af79351 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -1,4 +1,4 @@ -//===- ARMBaseRegisterInfo.h - ARM Register Information Impl ----*- C++ -*-===// +//===-- ARMBaseRegisterInfo.h - ARM Register Information Impl ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -35,7 +35,7 @@ namespace ARMRI { /// isARMArea1Register - Returns true if the register is a low register (r0-r7) /// or a stack/pc register that we should push/pop. -static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) { +static inline bool isARMArea1Register(unsigned Reg, bool isIOS) { using namespace ARM; switch (Reg) { case R0: case R1: case R2: case R3: @@ -43,25 +43,25 @@ static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) { case LR: case SP: case PC: return true; case R8: case R9: case R10: case R11: - // For darwin we want r7 and lr to be next to each other. - return !isDarwin; + // For iOS we want r7 and lr to be next to each other. + return !isIOS; default: return false; } } -static inline bool isARMArea2Register(unsigned Reg, bool isDarwin) { +static inline bool isARMArea2Register(unsigned Reg, bool isIOS) { using namespace ARM; switch (Reg) { case R8: case R9: case R10: case R11: - // Darwin has this second area. - return isDarwin; + // iOS has this second area. + return isIOS; default: return false; } } -static inline bool isARMArea3Register(unsigned Reg, bool isDarwin) { +static inline bool isARMArea3Register(unsigned Reg, bool isIOS) { using namespace ARM; switch (Reg) { case D15: case D14: case D13: case D12: @@ -94,17 +94,11 @@ protected: public: /// Code Generation virtual methods... - const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + const uint32_t *getCallPreservedMask(CallingConv::ID) const; BitVector getReservedRegs(const MachineFunction &MF) const; - /// getMatchingSuperRegClass - Return a subclass of the specified register - /// class A so that each register in it has a sub-register of the - /// specified sub-register index which is in the specified register class B. - virtual const TargetRegisterClass * - getMatchingSuperRegClass(const TargetRegisterClass *A, - const TargetRegisterClass *B, unsigned Idx) const; - /// canCombineSubRegIndices - Given a register class and a list of /// subregister indices, return true if it's possible to combine the /// subregister indices into one that corresponds to a larger @@ -125,7 +119,7 @@ public: unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const; - ArrayRef<unsigned> getRawAllocationOrder(const TargetRegisterClass *RC, + ArrayRef<uint16_t> getRawAllocationOrder(const TargetRegisterClass *RC, unsigned HintType, unsigned HintReg, const MachineFunction &MF) const; diff --git a/lib/Target/ARM/ARMBuildAttrs.h b/lib/Target/ARM/ARMBuildAttrs.h index 69eddf0..11bd6a4 100644 --- a/lib/Target/ARM/ARMBuildAttrs.h +++ b/lib/Target/ARM/ARMBuildAttrs.h @@ -1,4 +1,4 @@ -//===-------- ARMBuildAttrs.h - ARM Build Attributes ------------*- C++ -*-===// +//===-- ARMBuildAttrs.h - ARM Build Attributes ------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index ff7db1f..437b4c7 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -1,4 +1,4 @@ -//===-- ARMCallingConv.h - ARM Custom Calling Convention Routines ---------===// +//=== ARMCallingConv.h - ARM Custom Calling Convention Routines -*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index c22a08e..d33364b 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -1,4 +1,4 @@ -//===- ARMCallingConv.td - Calling Conventions for ARM -----*- tablegen -*-===// +//===-- ARMCallingConv.td - Calling Conventions for ARM ----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -164,3 +164,14 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[ S9, S10, S11, S12, S13, S14, S15]>>, CCDelegateTo<RetCC_ARM_AAPCS_Common> ]>; + +//===----------------------------------------------------------------------===// +// Callee-saved register lists. +//===----------------------------------------------------------------------===// + +def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, + (sequence "D%u", 15, 8))>; + +// iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register. +// Also save R7-R4 first to match the stack frame fixed spill areas. +def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 365f0bb..e48d07a 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -74,7 +74,7 @@ namespace { /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for /// machine instructions. - unsigned getBinaryCodeForInstr(const MachineInstr &MI) const; + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; bool runOnMachineFunction(MachineFunction &MF); @@ -199,6 +199,8 @@ namespace { unsigned Op) const { return 0; } unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } + unsigned getARMBLTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } unsigned getARMBLXTargetOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op) @@ -421,7 +423,6 @@ unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const { case ARM_AM::ror: case ARM_AM::rrx: return 3; } - return 0; } /// getMovi32Value - Return binary encoding of operand for movw/movt. If the @@ -550,7 +551,6 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { switch (MI.getDesc().TSFlags & ARMII::FormMask) { default: { llvm_unreachable("Unhandled instruction encoding format!"); - break; } case ARMII::MiscFrm: if (MI.getOpcode() == ARM::LEApcrelJT) { @@ -559,7 +559,6 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { break; } llvm_unreachable("Unhandled instruction encoding!"); - break; case ARMII::Pseudo: emitPseudoInstruction(MI); break; diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 2039d41..2cdfd1e 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -52,6 +52,7 @@ static cl::opt<bool> AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true), cl::desc("Adjust basic block layout to better use TB[BH]")); +// FIXME: This option should be removed once it has received sufficient testing. static cl::opt<bool> AlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true), cl::desc("Align constant islands in code")); @@ -194,14 +195,23 @@ namespace { MachineInstr *MI; MachineInstr *CPEMI; MachineBasicBlock *HighWaterMark; + private: unsigned MaxDisp; + public: bool NegOk; bool IsSoImm; + bool KnownAlignment; CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp, bool neg, bool soimm) - : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) { + : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm), + KnownAlignment(false) { HighWaterMark = CPEMI->getParent(); } + /// getMaxDisp - Returns the maximum displacement supported by MI. + /// Correct for unknown alignment. + unsigned getMaxDisp() const { + return KnownAlignment ? MaxDisp : MaxDisp - 2; + } }; /// CPUsers - Keep track of all of the machine instructions that use various @@ -299,6 +309,7 @@ namespace { bool FixUpConditionalBr(ImmBranch &Br); bool FixUpUnconditionalBr(ImmBranch &Br); bool UndoLRSpillRestore(); + bool mayOptimizeThumb2Instruction(const MachineInstr *MI) const; bool OptimizeThumb2Instructions(); bool OptimizeThumb2Branches(); bool ReorderThumb2JumpTables(); @@ -308,6 +319,7 @@ namespace { void ComputeBlockSize(MachineBasicBlock *MBB); unsigned GetOffsetOf(MachineInstr *MI) const; + unsigned GetUserOffset(CPUser&) const; void dumpBBs(); void verify(); @@ -316,7 +328,7 @@ namespace { bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, const CPUser &U) { return OffsetIsInRange(UserOffset, TrialOffset, - U.MaxDisp, U.NegOk, U.IsSoImm); + U.getMaxDisp(), U.NegOk, U.IsSoImm); } }; char ARMConstantIslands::ID = 0; @@ -335,11 +347,9 @@ void ARMConstantIslands::verify() { } for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { CPUser &U = CPUsers[i]; - unsigned UserOffset = GetOffsetOf(U.MI) + (isThumb ? 4 : 8); - unsigned CPEOffset = GetOffsetOf(U.CPEMI); - unsigned Disp = UserOffset < CPEOffset ? CPEOffset - UserOffset : - UserOffset - CPEOffset; - assert(Disp <= U.MaxDisp || "Constant pool entry out of range!"); + unsigned UserOffset = GetUserOffset(U); + assert(CPEIsInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp(), U.NegOk) && + "Constant pool entry out of range!"); } #endif } @@ -434,7 +444,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) CPChange |= HandleConstantPoolUser(i); if (CPChange && ++NoCPIters > 30) - llvm_unreachable("Constant Island pass failed to converge!"); + report_fatal_error("Constant Island pass failed to converge!"); DEBUG(dumpBBs()); // Clear NewWaterList now. If we split a block for branches, it should @@ -446,7 +456,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) BRChange |= FixUpImmediateBr(ImmBranches[i]); if (BRChange && ++NoBRIters > 30) - llvm_unreachable("Branch Fix Up pass failed to converge!"); + report_fatal_error("Branch Fix Up pass failed to converge!"); DEBUG(dumpBBs()); if (!CPChange && !BRChange) @@ -536,7 +546,7 @@ ARMConstantIslands::DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { // Ensure that future entries with higher alignment get inserted before // CPEMI. This is bucket sort with iterators. - for (unsigned a = LogAlign + 1; a < MaxAlign; ++a) + for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a) if (InsPoint[a] == InsAt) InsPoint[a] = CPEMI; @@ -545,7 +555,8 @@ ARMConstantIslands::DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { CPEs.push_back(CPEntry(CPEMI, i)); CPEntries.push_back(CPEs); ++NumCPEs; - DEBUG(dbgs() << "Moved CPI#" << i << " to end of function\n"); + DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = " + << Size << ", align = " << Align <<'\n'); } DEBUG(BB->dump()); } @@ -719,7 +730,6 @@ InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs) { switch (Opc) { default: llvm_unreachable("Unknown addressing mode for CP reference!"); - break; // Taking the address of a CP entry. case ARM::LEApcrel: @@ -795,6 +805,9 @@ void ARMConstantIslands::ComputeBlockSize(MachineBasicBlock *MBB) { // The actual size may be smaller, but still a multiple of the instr size. if (I->isInlineAsm()) BBI.Unalign = isThumb ? 1 : 2; + // Also consider instructions that may be shrunk later. + else if (isThumb && mayOptimizeThumb2Instruction(I)) + BBI.Unalign = 1; } // tBR_JTr contains a .align 2 directive. @@ -816,11 +829,11 @@ unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const { unsigned Offset = BBInfo[MBB->getNumber()].Offset; // Sum instructions before MI in MBB. - for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) { + for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { assert(I != MBB->end() && "Didn't find MI in its own basic block?"); - if (&*I == MI) return Offset; Offset += TII->GetInstSizeInBytes(I); } + return Offset; } /// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB @@ -923,34 +936,39 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { return NewBB; } +/// GetUserOffset - Compute the offset of U.MI as seen by the hardware +/// displacement computation. Update U.KnownAlignment to match its current +/// basic block location. +unsigned ARMConstantIslands::GetUserOffset(CPUser &U) const { + unsigned UserOffset = GetOffsetOf(U.MI); + const BasicBlockInfo &BBI = BBInfo[U.MI->getParent()->getNumber()]; + unsigned KnownBits = BBI.internalKnownBits(); + + // The value read from PC is offset from the actual instruction address. + UserOffset += (isThumb ? 4 : 8); + + // Because of inline assembly, we may not know the alignment (mod 4) of U.MI. + // Make sure U.getMaxDisp() returns a constrained range. + U.KnownAlignment = (KnownBits >= 2); + + // On Thumb, offsets==2 mod 4 are rounded down by the hardware for + // purposes of the displacement computation; compensate for that here. + // For unknown alignments, getMaxDisp() constrains the range instead. + if (isThumb && U.KnownAlignment) + UserOffset &= ~3u; + + return UserOffset; +} + /// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool /// reference) is within MaxDisp of TrialOffset (a proposed location of a /// constant pool entry). +/// UserOffset is computed by GetUserOffset above to include PC adjustments. If +/// the mod 4 alignment of UserOffset is not known, the uncertainty must be +/// subtracted from MaxDisp instead. CPUser::getMaxDisp() does that. bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, unsigned MaxDisp, bool NegativeOK, bool IsSoImm) { - // On Thumb offsets==2 mod 4 are rounded down by the hardware for - // purposes of the displacement computation; compensate for that here. - // Effectively, the valid range of displacements is 2 bytes smaller for such - // references. - unsigned TotalAdj = 0; - if (isThumb && UserOffset%4 !=0) { - UserOffset -= 2; - TotalAdj = 2; - } - // CPEs will be rounded up to a multiple of 4. - if (isThumb && TrialOffset%4 != 0) { - TrialOffset += 2; - TotalAdj += 2; - } - - // In Thumb2 mode, later branch adjustments can shift instructions up and - // cause alignment change. In the worst case scenario this can cause the - // user's effective address to be subtracted by 2 and the CPE's address to - // be plus 2. - if (isThumb2 && TotalAdj != 4) - MaxDisp -= (4 - TotalAdj); - if (UserOffset <= TrialOffset) { // User before the Trial. if (TrialOffset - UserOffset <= MaxDisp) @@ -1049,14 +1067,22 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) { #endif // NDEBUG void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB) { - for(unsigned i = BB->getNumber() + 1, e = MF->getNumBlockIDs(); i < e; ++i) { + unsigned BBNum = BB->getNumber(); + for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) { // Get the offset and known bits at the end of the layout predecessor. // Include the alignment of the current block. unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment(); unsigned Offset = BBInfo[i - 1].postOffset(LogAlign); unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign); - // This is where block i begins. + // This is where block i begins. Stop if the offset is already correct, + // and we have updated 2 blocks. This is the maximum number of blocks + // changed before calling this function. + if (i > BBNum + 2 && + BBInfo[i].Offset == Offset && + BBInfo[i].KnownBits == KnownBits) + break; + BBInfo[i].Offset = Offset; BBInfo[i].KnownBits = KnownBits; } @@ -1092,7 +1118,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) MachineInstr *CPEMI = U.CPEMI; // Check to see if the CPE is already in-range. - if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) { + if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk, true)) { DEBUG(dbgs() << "In range\n"); return 1; } @@ -1107,7 +1133,8 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) // Removing CPEs can leave empty entries, skip if (CPEs[i].CPEMI == NULL) continue; - if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) { + if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(), + U.NegOk)) { DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" << CPEs[i].CPI << "\n"); // Point the CPUser node to the replacement @@ -1208,8 +1235,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, // If the block does not end in an unconditional branch already, and if the // end of the block is within range, make new water there. (The addition // below is for the unconditional branch we will be adding: 4 bytes on ARM + - // Thumb2, 2 on Thumb1. Possible Thumb1 alignment padding is allowed for - // inside OffsetIsInRange. + // Thumb2, 2 on Thumb1. if (BBHasFallthrough(UserMBB)) { // Size of branch to insert. unsigned Delta = isThumb1 ? 2 : 4; @@ -1262,7 +1288,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry"); unsigned KnownBits = UserBBI.internalKnownBits(); unsigned UPad = UnknownPadding(LogAlign, KnownBits); - unsigned BaseInsertOffset = UserOffset + U.MaxDisp; + unsigned BaseInsertOffset = UserOffset + U.getMaxDisp(); DEBUG(dbgs() << format("Split in middle of big block before %#x", BaseInsertOffset)); @@ -1343,9 +1369,8 @@ bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { MachineInstr *CPEMI = U.CPEMI; unsigned CPI = CPEMI->getOperand(1).getIndex(); unsigned Size = CPEMI->getOperand(2).getImm(); - // Compute this only once, it's expensive. The 4 or 8 is the value the - // hardware keeps in the PC. - unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8); + // Compute this only once, it's expensive. + unsigned UserOffset = GetUserOffset(U); // See if the current entry is within range, or there is a clone of it // in range. @@ -1652,6 +1677,25 @@ bool ARMConstantIslands::UndoLRSpillRestore() { return MadeChange; } +// mayOptimizeThumb2Instruction - Returns true if OptimizeThumb2Instructions +// below may shrink MI. +bool +ARMConstantIslands::mayOptimizeThumb2Instruction(const MachineInstr *MI) const { + switch(MI->getOpcode()) { + // OptimizeThumb2Instructions. + case ARM::t2LEApcrel: + case ARM::t2LDRpci: + // OptimizeThumb2Branches. + case ARM::t2B: + case ARM::t2Bcc: + case ARM::tBcc: + // OptimizeThumb2JumpTables. + case ARM::t2BR_JT: + return true; + } + return false; +} + bool ARMConstantIslands::OptimizeThumb2Instructions() { bool MadeChange = false; @@ -1683,8 +1727,13 @@ bool ARMConstantIslands::OptimizeThumb2Instructions() { if (!NewOpc) continue; - unsigned UserOffset = GetOffsetOf(U.MI) + 4; + unsigned UserOffset = GetUserOffset(U); unsigned MaxOffs = ((1 << Bits) - 1) * Scale; + + // Be conservative with inline asm. + if (!U.KnownAlignment) + MaxOffs -= 2; + // FIXME: Check if offset is multiple of scale if scale is not 4. if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) { U.MI->setDesc(TII->get(NewOpc)); @@ -1741,6 +1790,11 @@ bool ARMConstantIslands::OptimizeThumb2Branches() { if (Opcode != ARM::tBcc) continue; + // If the conditional branch doesn't kill CPSR, then CPSR can be liveout + // so this transformation is not safe. + if (!Br.MI->killsRegister(ARM::CPSR)) + continue; + NewOpc = 0; unsigned PredReg = 0; ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg); diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index 9576283..fa3226e 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -1,4 +1,4 @@ -//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===// +//===-- ARMConstantPoolValue.cpp - ARM constantpool value -----------------===// // // The LLVM Compiler Infrastructure // @@ -48,7 +48,6 @@ ARMConstantPoolValue::~ARMConstantPoolValue() {} const char *ARMConstantPoolValue::getModifierText() const { switch (Modifier) { - default: llvm_unreachable("Unknown modifier!"); // FIXME: Are these case sensitive? It'd be nice to lower-case all the // strings if that's legal. case ARMCP::no_modifier: return "none"; @@ -58,12 +57,12 @@ const char *ARMConstantPoolValue::getModifierText() const { case ARMCP::GOTTPOFF: return "gottpoff"; case ARMCP::TPOFF: return "tpoff"; } + llvm_unreachable("Unknown modifier!"); } int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { - assert(false && "Shouldn't be calling this directly!"); - return -1; + llvm_unreachable("Shouldn't be calling this directly!"); } void diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index 0d0def3..6b98d44 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -1,4 +1,4 @@ -//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===// +//===-- ARMConstantPoolValue.h - ARM constantpool value ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/ARMELFWriterInfo.cpp b/lib/Target/ARM/ARMELFWriterInfo.cpp index 51e68b4..f671317 100644 --- a/lib/Target/ARM/ARMELFWriterInfo.cpp +++ b/lib/Target/ARM/ARMELFWriterInfo.cpp @@ -41,43 +41,38 @@ unsigned ARMELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { case ARM::reloc_arm_machine_cp_entry: case ARM::reloc_arm_jt_base: case ARM::reloc_arm_pic_jt: - assert(0 && "unsupported ARM relocation type"); break; - - case ARM::reloc_arm_branch: return ELF::R_ARM_CALL; break; - case ARM::reloc_arm_movt: return ELF::R_ARM_MOVT_ABS; break; - case ARM::reloc_arm_movw: return ELF::R_ARM_MOVW_ABS_NC; break; + llvm_unreachable("unsupported ARM relocation type"); + + case ARM::reloc_arm_branch: return ELF::R_ARM_CALL; + case ARM::reloc_arm_movt: return ELF::R_ARM_MOVT_ABS; + case ARM::reloc_arm_movw: return ELF::R_ARM_MOVW_ABS_NC; default: - llvm_unreachable("unknown ARM relocation type"); break; + llvm_unreachable("unknown ARM relocation type"); } - return 0; } long int ARMELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, long int Modifier) const { - assert(0 && "ARMELFWriterInfo::getDefaultAddendForRelTy() not implemented"); - return 0; + llvm_unreachable("ARMELFWriterInfo::getDefaultAddendForRelTy() not " + "implemented"); } unsigned ARMELFWriterInfo::getRelocationTySize(unsigned RelTy) const { - assert(0 && "ARMELFWriterInfo::getRelocationTySize() not implemented"); - return 0; + llvm_unreachable("ARMELFWriterInfo::getRelocationTySize() not implemented"); } bool ARMELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { - assert(0 && "ARMELFWriterInfo::isPCRelativeRel() not implemented"); - return 1; + llvm_unreachable("ARMELFWriterInfo::isPCRelativeRel() not implemented"); } unsigned ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() const { - assert(0 && - "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented"); - return 0; + llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not " + "implemented"); } long int ARMELFWriterInfo::computeRelocation(unsigned SymOffset, unsigned RelOffset, unsigned RelTy) const { - assert(0 && - "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented"); - return 0; + llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not " + "implemented"); } diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 01d772d..c4ab99d 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1,4 +1,4 @@ -//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -----*- C++ -*-=// +//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===// // // The LLVM Compiler Infrastructure // @@ -148,25 +148,16 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false}, { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false}, -{ ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, false, SingleSpc, 2, 4 ,false}, -{ ARM::VLD1q16PseudoWB_fixed, ARM::VLD1q16wb_fixed,true,false,false,SingleSpc, 2, 4 ,false}, -{ ARM::VLD1q16PseudoWB_register, ARM::VLD1q16wb_register, true, true, true, SingleSpc, 2, 4 ,false}, -{ ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, false, SingleSpc, 2, 2 ,false}, -{ ARM::VLD1q32PseudoWB_fixed, ARM::VLD1q32wb_fixed,true,false, false,SingleSpc, 2, 2 ,false}, -{ ARM::VLD1q32PseudoWB_register, ARM::VLD1q32wb_register, true, true, true, SingleSpc, 2, 2 ,false}, -{ ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, false, SingleSpc, 2, 1 ,false}, -{ ARM::VLD1q64PseudoWB_fixed, ARM::VLD1q64wb_fixed,true,false, false,SingleSpc, 2, 2 ,false}, -{ ARM::VLD1q64PseudoWB_register, ARM::VLD1q64wb_register, true, true, true, SingleSpc, 2, 1 ,false}, -{ ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, false, SingleSpc, 2, 8 ,false}, -{ ARM::VLD1q8PseudoWB_fixed, ARM::VLD1q8wb_fixed,true,false, false, SingleSpc, 2, 8 ,false}, -{ ARM::VLD1q8PseudoWB_register, ARM::VLD1q8wb_register,true,true, true,SingleSpc,2,8,false}, - -{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, false, SingleSpc, 2, 4,true}, -{ ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true, true, SingleSpc, 2, 4,true}, -{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, false, SingleSpc, 2, 2,true}, -{ ARM::VLD2DUPd32Pseudo_UPD, ARM::VLD2DUPd32_UPD, true, true, true, SingleSpc, 2, 2,true}, -{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, false, SingleSpc, 2, 8,true}, -{ ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd8_UPD, true, true, true, SingleSpc, 2, 8,true}, + +{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, false, SingleSpc, 2, 4,false}, +{ ARM::VLD2DUPd16PseudoWB_fixed, ARM::VLD2DUPd16wb_fixed, true, true, false, SingleSpc, 2, 4,false}, +{ ARM::VLD2DUPd16PseudoWB_register, ARM::VLD2DUPd16wb_register, true, true, true, SingleSpc, 2, 4,false}, +{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, false, SingleSpc, 2, 2,false}, +{ ARM::VLD2DUPd32PseudoWB_fixed, ARM::VLD2DUPd32wb_fixed, true, true, false, SingleSpc, 2, 2,false}, +{ ARM::VLD2DUPd32PseudoWB_register, ARM::VLD2DUPd32wb_register, true, true, true, SingleSpc, 2, 2,false}, +{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, false, SingleSpc, 2, 8,false}, +{ ARM::VLD2DUPd8PseudoWB_fixed, ARM::VLD2DUPd8wb_fixed, true, true, false, SingleSpc, 2, 8,false}, +{ ARM::VLD2DUPd8PseudoWB_register, ARM::VLD2DUPd8wb_register, true, true, true, SingleSpc, 2, 8,false}, { ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true}, { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true}, @@ -179,16 +170,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, false, EvenDblSpc, 2, 2 ,true}, { ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true}, -{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, false, SingleSpc, 2, 4 ,false}, -{ ARM::VLD2d16PseudoWB_fixed, ARM::VLD2d16wb_fixed, true, true, false, SingleSpc, 2, 4 ,false}, -{ ARM::VLD2d16PseudoWB_register, ARM::VLD2d16wb_register, true, true, true, SingleSpc, 2, 4 ,false}, -{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, false, SingleSpc, 2, 2 ,false}, -{ ARM::VLD2d32PseudoWB_fixed, ARM::VLD2d32wb_fixed, true, true, false, SingleSpc, 2, 2 ,false}, -{ ARM::VLD2d32PseudoWB_register, ARM::VLD2d32wb_register, true, true, true, SingleSpc, 2, 2 ,false}, -{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, false, SingleSpc, 2, 8 ,false}, -{ ARM::VLD2d8PseudoWB_fixed, ARM::VLD2d8wb_fixed, true, true, false, SingleSpc, 2, 8 ,false}, -{ ARM::VLD2d8PseudoWB_register, ARM::VLD2d8wb_register, true, true, true, SingleSpc, 2, 8 ,false}, - { ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false}, { ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false}, { ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false}, @@ -283,19 +264,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false}, { ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false}, -{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, false, SingleSpc, 2, 4 ,false}, -{ ARM::VST1q16PseudoWB_fixed, ARM::VST1q16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false}, -{ ARM::VST1q16PseudoWB_register, ARM::VST1q16wb_register, false, true, true, SingleSpc, 2, 4 ,false}, -{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, false, SingleSpc, 2, 2 ,false}, -{ ARM::VST1q32PseudoWB_fixed, ARM::VST1q32wb_fixed, false, true, false, SingleSpc, 2, 2 ,false}, -{ ARM::VST1q32PseudoWB_register, ARM::VST1q32wb_register, false, true, true, SingleSpc, 2, 2 ,false}, -{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, false, SingleSpc, 2, 1 ,false}, -{ ARM::VST1q64PseudoWB_fixed, ARM::VST1q64wb_fixed, false, true, false, SingleSpc, 2, 1 ,false}, -{ ARM::VST1q64PseudoWB_register, ARM::VST1q64wb_register, false, true, true, SingleSpc, 2, 1 ,false}, -{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, false, SingleSpc, 2, 8 ,false}, -{ ARM::VST1q8PseudoWB_fixed, ARM::VST1q8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false}, -{ ARM::VST1q8PseudoWB_register, ARM::VST1q8wb_register, false, true, true, SingleSpc, 2, 8 ,false}, - { ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true}, { ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true}, { ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, false, SingleSpc, 2, 2 ,true}, @@ -307,16 +275,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true}, { ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true}, -{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,false}, -{ ARM::VST2d16PseudoWB_fixed, ARM::VST2d16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false}, -{ ARM::VST2d16PseudoWB_register, ARM::VST2d16wb_register, false, true, true, SingleSpc, 2, 4 ,false}, -{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,false}, -{ ARM::VST2d32PseudoWB_fixed, ARM::VST2d32wb_fixed, false, true, true, SingleSpc, 2, 2 ,false}, -{ ARM::VST2d32PseudoWB_register, ARM::VST2d32wb_register, false, true, true, SingleSpc, 2, 2 ,false}, -{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,false}, -{ ARM::VST2d8PseudoWB_fixed, ARM::VST2d8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false}, -{ ARM::VST2d8PseudoWB_register, ARM::VST2d8wb_register, false, true, true, SingleSpc, 2, 8 ,false}, - { ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false}, { ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false}, { ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false}, @@ -631,6 +589,8 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { // Add an implicit def for the super-register. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); TransferImpOps(MI, MIB, MIB); + // Transfer memoperands. + MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MI.eraseFromParent(); } @@ -837,7 +797,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } - case ARM::eh_sjlj_dispatchsetup: { + case ARM::Int_eh_sjlj_dispatchsetup: + case ARM::Int_eh_sjlj_dispatchsetup_nofp: + case ARM::tInt_eh_sjlj_dispatchsetup: { MachineFunction &MF = *MI.getParent()->getParent(); const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); @@ -1024,6 +986,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // Add an implicit def for the super-register. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); TransferImpOps(MI, MIB, MIB); + MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MI.eraseFromParent(); return true; } @@ -1054,6 +1017,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MIB->addRegisterKilled(SrcReg, TRI, true); TransferImpOps(MI, MIB, MIB); + MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MI.eraseFromParent(); return true; } @@ -1085,33 +1049,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, return true; } - case ARM::VLD1q8Pseudo: - case ARM::VLD1q16Pseudo: - case ARM::VLD1q32Pseudo: - case ARM::VLD1q64Pseudo: - case ARM::VLD1q8PseudoWB_register: - case ARM::VLD1q16PseudoWB_register: - case ARM::VLD1q32PseudoWB_register: - case ARM::VLD1q64PseudoWB_register: - case ARM::VLD1q8PseudoWB_fixed: - case ARM::VLD1q16PseudoWB_fixed: - case ARM::VLD1q32PseudoWB_fixed: - case ARM::VLD1q64PseudoWB_fixed: - case ARM::VLD2d8Pseudo: - case ARM::VLD2d16Pseudo: - case ARM::VLD2d32Pseudo: case ARM::VLD2q8Pseudo: case ARM::VLD2q16Pseudo: case ARM::VLD2q32Pseudo: - case ARM::VLD2d8PseudoWB_fixed: - case ARM::VLD2d16PseudoWB_fixed: - case ARM::VLD2d32PseudoWB_fixed: case ARM::VLD2q8PseudoWB_fixed: case ARM::VLD2q16PseudoWB_fixed: case ARM::VLD2q32PseudoWB_fixed: - case ARM::VLD2d8PseudoWB_register: - case ARM::VLD2d16PseudoWB_register: - case ARM::VLD2d32PseudoWB_register: case ARM::VLD2q8PseudoWB_register: case ARM::VLD2q16PseudoWB_register: case ARM::VLD2q32PseudoWB_register: @@ -1159,9 +1102,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD2DUPd8Pseudo: case ARM::VLD2DUPd16Pseudo: case ARM::VLD2DUPd32Pseudo: - case ARM::VLD2DUPd8Pseudo_UPD: - case ARM::VLD2DUPd16Pseudo_UPD: - case ARM::VLD2DUPd32Pseudo_UPD: + case ARM::VLD2DUPd8PseudoWB_fixed: + case ARM::VLD2DUPd16PseudoWB_fixed: + case ARM::VLD2DUPd32PseudoWB_fixed: + case ARM::VLD2DUPd8PseudoWB_register: + case ARM::VLD2DUPd16PseudoWB_register: + case ARM::VLD2DUPd32PseudoWB_register: case ARM::VLD3DUPd8Pseudo: case ARM::VLD3DUPd16Pseudo: case ARM::VLD3DUPd32Pseudo: @@ -1177,33 +1123,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandVLD(MBBI); return true; - case ARM::VST1q8Pseudo: - case ARM::VST1q16Pseudo: - case ARM::VST1q32Pseudo: - case ARM::VST1q64Pseudo: - case ARM::VST1q8PseudoWB_fixed: - case ARM::VST1q16PseudoWB_fixed: - case ARM::VST1q32PseudoWB_fixed: - case ARM::VST1q64PseudoWB_fixed: - case ARM::VST1q8PseudoWB_register: - case ARM::VST1q16PseudoWB_register: - case ARM::VST1q32PseudoWB_register: - case ARM::VST1q64PseudoWB_register: - case ARM::VST2d8Pseudo: - case ARM::VST2d16Pseudo: - case ARM::VST2d32Pseudo: case ARM::VST2q8Pseudo: case ARM::VST2q16Pseudo: case ARM::VST2q32Pseudo: - case ARM::VST2d8PseudoWB_fixed: - case ARM::VST2d16PseudoWB_fixed: - case ARM::VST2d32PseudoWB_fixed: case ARM::VST2q8PseudoWB_fixed: case ARM::VST2q16PseudoWB_fixed: case ARM::VST2q32PseudoWB_fixed: - case ARM::VST2d8PseudoWB_register: - case ARM::VST2d16PseudoWB_register: - case ARM::VST2d32PseudoWB_register: case ARM::VST2q8PseudoWB_register: case ARM::VST2q16PseudoWB_register: case ARM::VST2q32PseudoWB_register: @@ -1321,15 +1246,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandLaneOp(MBBI); return true; - case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false); return true; case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true; case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; - case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true); return true; case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true; } - - return false; } bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index a98dfc3..818b202 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -157,14 +157,16 @@ class ARMFastISel : public FastISel { bool SelectLoad(const Instruction *I); bool SelectStore(const Instruction *I); bool SelectBranch(const Instruction *I); + bool SelectIndirectBr(const Instruction *I); bool SelectCmp(const Instruction *I); bool SelectFPExt(const Instruction *I); bool SelectFPTrunc(const Instruction *I); - bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode); - bool SelectSIToFP(const Instruction *I); - bool SelectFPToSI(const Instruction *I); - bool SelectSDiv(const Instruction *I); - bool SelectSRem(const Instruction *I); + bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode); + bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode); + bool SelectIToFP(const Instruction *I, bool isSigned); + bool SelectFPToI(const Instruction *I, bool isSigned); + bool SelectDiv(const Instruction *I, bool isSigned); + bool SelectRem(const Instruction *I, bool isSigned); bool SelectCall(const Instruction *I, const char *IntrMemName); bool SelectIntrinsicCall(const IntrinsicInst &I); bool SelectSelect(const Instruction *I); @@ -299,10 +301,10 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -319,11 +321,11 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill)); @@ -342,12 +344,12 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addReg(Op2, Op2IsKill * RegState::Kill)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) @@ -366,11 +368,11 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm)); @@ -388,11 +390,11 @@ unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm)); @@ -411,12 +413,12 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) @@ -434,10 +436,10 @@ unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addImm(Imm)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addImm(Imm)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -453,10 +455,10 @@ unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addImm(Imm1).addImm(Imm2)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addImm(Imm1).addImm(Imm2)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -473,9 +475,10 @@ unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); assert(TargetRegisterInfo::isVirtualRegister(Op0) && "Cannot yet extract from physregs"); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DL, TII.get(TargetOpcode::COPY), ResultReg) - .addReg(Op0, getKillRegState(Op0IsKill), Idx)); + DL, TII.get(TargetOpcode::COPY), ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill), Idx)); return ResultReg; } @@ -486,7 +489,7 @@ unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) { unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(ARM::VMOVRS), MoveReg) + TII.get(ARM::VMOVSR), MoveReg) .addReg(SrcReg)); return MoveReg; } @@ -496,7 +499,7 @@ unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) { unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(ARM::VMOVSR), MoveReg) + TII.get(ARM::VMOVRS), MoveReg) .addReg(SrcReg)); return MoveReg; } @@ -617,40 +620,65 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { // TODO: Need more magic for ARM PIC. if (!isThumb2 && (RelocM == Reloc::PIC_)) return 0; - // MachineConstantPool wants an explicit alignment. - unsigned Align = TD.getPrefTypeAlignment(GV->getType()); - if (Align == 0) { - // TODO: Figure out if this is correct. - Align = TD.getTypeAllocSize(GV->getType()); - } - - // Grab index. - unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); - unsigned Id = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id, - ARMCP::CPValue, - PCAdj); - unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); - - // Load value. - MachineInstrBuilder MIB; unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); - if (isThumb2) { - unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) - .addConstantPoolIndex(Idx); - if (RelocM == Reloc::PIC_) - MIB.addImm(Id); + + // Use movw+movt when possible, it avoids constant pool entries. + // Darwin targets don't support movt with Reloc::Static, see + // ARMTargetLowering::LowerGlobalAddressDarwin. Other targets only support + // static movt relocations. + if (Subtarget->useMovt() && + Subtarget->isTargetDarwin() == (RelocM != Reloc::Static)) { + unsigned Opc; + switch (RelocM) { + case Reloc::PIC_: + Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel; + break; + case Reloc::DynamicNoPIC: + Opc = isThumb2 ? ARM::t2MOV_ga_dyn : ARM::MOV_ga_dyn; + break; + default: + Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm; + break; + } + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), + DestReg).addGlobalAddress(GV)); } else { - // The extra immediate is for addrmode2. - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), - DestReg) - .addConstantPoolIndex(Idx) - .addImm(0); + // MachineConstantPool wants an explicit alignment. + unsigned Align = TD.getPrefTypeAlignment(GV->getType()); + if (Align == 0) { + // TODO: Figure out if this is correct. + Align = TD.getTypeAllocSize(GV->getType()); + } + + // Grab index. + unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : + (Subtarget->isThumb() ? 4 : 8); + unsigned Id = AFI->createPICLabelUId(); + ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id, + ARMCP::CPValue, + PCAdj); + unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); + + // Load value. + MachineInstrBuilder MIB; + if (isThumb2) { + unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addConstantPoolIndex(Idx); + if (RelocM == Reloc::PIC_) + MIB.addImm(Id); + } else { + // The extra immediate is for addrmode2. + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), + DestReg) + .addConstantPoolIndex(Idx) + .addImm(0); + } + AddOptionalDefs(MIB); } - AddOptionalDefs(MIB); if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) { + MachineInstrBuilder MIB; unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); if (isThumb2) MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -700,7 +728,7 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { // This will get lowered later into the correct offsets and registers // via rewriteXFrameIndex. if (SI != FuncInfo.StaticAllocaMap.end()) { - TargetRegisterClass* RC = TLI.getRegClassFor(VT); + const TargetRegisterClass* RC = TLI.getRegClassFor(VT); unsigned ResultReg = createResultReg(RC); unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -844,16 +872,6 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { } } - // Materialize the global variable's address into a reg which can - // then be used later to load the variable. - if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) { - unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType())); - if (Tmp == 0) return false; - - Addr.Base.Reg = Tmp; - return true; - } - // Try to get this in a register if nothing else has worked. if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj); return Addr.Base.Reg != 0; @@ -865,9 +883,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { bool needsLowering = false; switch (VT.getSimpleVT().SimpleTy) { - default: - assert(false && "Unhandled load/store type!"); - break; + default: llvm_unreachable("Unhandled load/store type!"); case MVT::i1: case MVT::i8: case MVT::i16: @@ -895,8 +911,8 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { // put the alloca address into a register, set the base type back to // register and continue. This should almost never happen. if (needsLowering && Addr.BaseType == Address::FrameIndexBase) { - TargetRegisterClass *RC = isThumb2 ? ARM::tGPRRegisterClass : - ARM::GPRRegisterClass; + const TargetRegisterClass *RC = isThumb2 ? ARM::tGPRRegisterClass + : ARM::GPRRegisterClass; unsigned ResultReg = createResultReg(RC); unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -971,7 +987,7 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, unsigned Opc; bool useAM3 = false; bool needVMOV = false; - TargetRegisterClass *RC; + const TargetRegisterClass *RC; switch (VT.getSimpleVT().SimpleTy) { // This is mostly going to be Neon/vector support. default: return false; @@ -1336,6 +1352,16 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { return true; } +bool ARMFastISel::SelectIndirectBr(const Instruction *I) { + unsigned AddrReg = getRegForValue(I->getOperand(0)); + if (AddrReg == 0) return false; + + unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX; + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) + .addReg(AddrReg)); + return true; +} + bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt) { Type *Ty = Src1Value->getType(); @@ -1416,14 +1442,11 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, // We have i1, i8, or i16, we need to either zero extend or sign extend. if (needsExt) { - unsigned ResultReg; - ResultReg = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt); - if (ResultReg == 0) return false; - SrcReg1 = ResultReg; + SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt); + if (SrcReg1 == 0) return false; if (!UseImm) { - ResultReg = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt); - if (ResultReg == 0) return false; - SrcReg2 = ResultReg; + SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt); + if (SrcReg2 == 0) return false; } } @@ -1467,8 +1490,8 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { // Now set a register based on the comparison. Explicitly set the predicates // here. unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi; - TargetRegisterClass *RC = isThumb2 ? ARM::rGPRRegisterClass - : ARM::GPRRegisterClass; + const TargetRegisterClass *RC = isThumb2 ? ARM::rGPRRegisterClass + : ARM::GPRRegisterClass; unsigned DestReg = createResultReg(RC); Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0); unsigned ZeroReg = TargetMaterializeConstant(Zero); @@ -1520,7 +1543,7 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) { return true; } -bool ARMFastISel::SelectSIToFP(const Instruction *I) { +bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) { // Make sure we have VFP. if (!Subtarget->hasVFP2()) return false; @@ -1540,9 +1563,9 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) { // Handle sign-extension. if (SrcVT == MVT::i16 || SrcVT == MVT::i8) { EVT DestVT = MVT::i32; - unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, /*isZExt*/ false); - if (ResultReg == 0) return false; - SrcReg = ResultReg; + SrcReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, + /*isZExt*/!isSigned); + if (SrcReg == 0) return false; } // The conversion routine works on fp-reg to fp-reg and the operand above @@ -1551,8 +1574,8 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) { if (FP == 0) return false; unsigned Opc; - if (Ty->isFloatTy()) Opc = ARM::VSITOS; - else if (Ty->isDoubleTy()) Opc = ARM::VSITOD; + if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS; + else if (Ty->isDoubleTy()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD; else return false; unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT)); @@ -1563,7 +1586,7 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) { return true; } -bool ARMFastISel::SelectFPToSI(const Instruction *I) { +bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) { // Make sure we have VFP. if (!Subtarget->hasVFP2()) return false; @@ -1577,11 +1600,11 @@ bool ARMFastISel::SelectFPToSI(const Instruction *I) { unsigned Opc; Type *OpTy = I->getOperand(0)->getType(); - if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS; - else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD; + if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS; + else if (OpTy->isDoubleTy()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD; else return false; - // f64->s32 or f32->s32 both need an intermediate f32 reg. + // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg. unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) @@ -1656,7 +1679,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { return true; } -bool ARMFastISel::SelectSDiv(const Instruction *I) { +bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) { MVT VT; Type *Ty = I->getType(); if (!isTypeLegal(Ty, VT)) @@ -1670,21 +1693,21 @@ bool ARMFastISel::SelectSDiv(const Instruction *I) { // Otherwise emit a libcall. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i8) - LC = RTLIB::SDIV_I8; + LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8; else if (VT == MVT::i16) - LC = RTLIB::SDIV_I16; + LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16; else if (VT == MVT::i32) - LC = RTLIB::SDIV_I32; + LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32; else if (VT == MVT::i64) - LC = RTLIB::SDIV_I64; + LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64; else if (VT == MVT::i128) - LC = RTLIB::SDIV_I128; + LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); return ARMEmitLibcall(I, LC); } -bool ARMFastISel::SelectSRem(const Instruction *I) { +bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) { MVT VT; Type *Ty = I->getType(); if (!isTypeLegal(Ty, VT)) @@ -1692,21 +1715,59 @@ bool ARMFastISel::SelectSRem(const Instruction *I) { RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i8) - LC = RTLIB::SREM_I8; + LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8; else if (VT == MVT::i16) - LC = RTLIB::SREM_I16; + LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16; else if (VT == MVT::i32) - LC = RTLIB::SREM_I32; + LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32; else if (VT == MVT::i64) - LC = RTLIB::SREM_I64; + LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64; else if (VT == MVT::i128) - LC = RTLIB::SREM_I128; + LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); return ARMEmitLibcall(I, LC); } -bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { +bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { + EVT DestVT = TLI.getValueType(I->getType(), true); + + // We can get here in the case when we have a binary operation on a non-legal + // type and the target independent selector doesn't know how to handle it. + if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1) + return false; + + unsigned Opc; + switch (ISDOpcode) { + default: return false; + case ISD::ADD: + Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr; + break; + case ISD::OR: + Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr; + break; + case ISD::SUB: + Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr; + break; + } + + unsigned SrcReg1 = getRegForValue(I->getOperand(0)); + if (SrcReg1 == 0) return false; + + // TODO: Often the 2nd operand is an immediate, which can be encoded directly + // in the instruction, rather then materializing the value in a register. + unsigned SrcReg2 = getRegForValue(I->getOperand(1)); + if (SrcReg2 == 0) return false; + + unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg) + .addReg(SrcReg1).addReg(SrcReg2)); + UpdateValueMap(I, ResultReg); + return true; +} + +bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) { EVT VT = TLI.getValueType(I->getType(), true); // We can get here in the case when we want to use NEON for our fp @@ -1814,10 +1875,8 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, case CCValAssign::Full: break; case CCValAssign::SExt: { MVT DestVT = VA.getLocVT(); - unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT, - /*isZExt*/false); - assert (ResultReg != 0 && "Failed to emit a sext"); - Arg = ResultReg; + Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false); + assert (Arg != 0 && "Failed to emit a sext"); ArgVT = DestVT; break; } @@ -1825,10 +1884,8 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, // Intentional fall-through. Handle AExt and ZExt. case CCValAssign::ZExt: { MVT DestVT = VA.getLocVT(); - unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT, - /*isZExt*/true); - assert (ResultReg != 0 && "Failed to emit a sext"); - Arg = ResultReg; + Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true); + assert (Arg != 0 && "Failed to emit a sext"); ArgVT = DestVT; break; } @@ -1898,7 +1955,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, // For this move we copy into two registers and then move into the // double fp reg we want. EVT DestVT = RVLocs[0].getValVT(); - TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); + const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); unsigned ResultReg = createResultReg(DstRC); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::VMOVDRR), ResultReg) @@ -1918,7 +1975,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16) CopyVT = MVT::i32; - TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); + const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); unsigned ResultReg = createResultReg(DstRC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), @@ -1980,15 +2037,14 @@ bool ARMFastISel::SelectRet(const Instruction *I) { if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) return false; - if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) - return false; - assert(DestVT == MVT::i32 && "ARM should always ext to i32"); - bool isZExt = Outs[0].Flags.isZExt(); - unsigned ResultReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, isZExt); - if (ResultReg == 0) return false; - SrcReg = ResultReg; + // Perform extension if flagged as either zext or sext. Otherwise, do + // nothing. + if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) { + SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt()); + if (SrcReg == 0) return false; + } } // Make the copy. @@ -2012,12 +2068,12 @@ bool ARMFastISel::SelectRet(const Instruction *I) { unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { - // Darwin needs the r9 versions of the opcodes. - bool isDarwin = Subtarget->isTargetDarwin(); + // iOS needs the r9 versions of the opcodes. + bool isiOS = Subtarget->isTargetIOS(); if (isThumb2) { - return isDarwin ? ARM::tBLr9 : ARM::tBL; + return isiOS ? ARM::tBLr9 : ARM::tBL; } else { - return isDarwin ? ARM::BLr9 : ARM::BL; + return isiOS ? ARM::BLr9 : ARM::BL; } } @@ -2076,7 +2132,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) return false; - // Issue the call, BLr9 for darwin, BL otherwise. + // Issue the call, BLr9 for iOS, BL otherwise. // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(NULL); @@ -2095,6 +2151,10 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); + // Add a register mask with the call-preserved registers. + // Proper defs for return values will be added by setPhysRegsDeadExcept(). + MIB.addRegMask(TRI.getCallPreservedMask(CC)); + // Finish off the call including any return values. SmallVector<unsigned, 4> UsedRegs; if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; @@ -2147,10 +2207,11 @@ bool ARMFastISel::SelectCall(const Instruction *I, SmallVector<unsigned, 8> ArgRegs; SmallVector<MVT, 8> ArgVTs; SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; - Args.reserve(CS.arg_size()); - ArgRegs.reserve(CS.arg_size()); - ArgVTs.reserve(CS.arg_size()); - ArgFlags.reserve(CS.arg_size()); + unsigned arg_size = CS.arg_size(); + Args.reserve(arg_size); + ArgRegs.reserve(arg_size); + ArgVTs.reserve(arg_size); + ArgFlags.reserve(arg_size); for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { // If we're lowering a memory intrinsic instead of a regular call, skip the @@ -2197,7 +2258,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) return false; - // Issue the call, BLr9 for darwin, BL otherwise. + // Issue the call, BLr9 for iOS, BL otherwise. // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(GV); @@ -2226,6 +2287,10 @@ bool ARMFastISel::SelectCall(const Instruction *I, for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); + // Add a register mask with the call-preserved registers. + // Proper defs for return values will be added by setPhysRegsDeadExcept(). + MIB.addRegMask(TRI.getCallPreservedMask(CC)); + // Finish off the call including any return values. SmallVector<unsigned, 4> UsedRegs; if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; @@ -2260,9 +2325,10 @@ bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len) bool RV; unsigned ResultReg; RV = ARMEmitLoad(VT, ResultReg, Src); - assert (RV = true && "Should be able to handle this load."); + assert (RV == true && "Should be able to handle this load."); RV = ARMEmitStore(VT, ResultReg, Dest); - assert (RV = true && "Should be able to handle this store."); + assert (RV == true && "Should be able to handle this store."); + (void)RV; unsigned Size = VT.getSizeInBits()/8; Len -= Size; @@ -2325,7 +2391,6 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { return SelectCall(&I, "memset"); } } - return false; } bool ARMFastISel::SelectTrunc(const Instruction *I) { @@ -2427,6 +2492,8 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { return SelectStore(I); case Instruction::Br: return SelectBranch(I); + case Instruction::IndirectBr: + return SelectIndirectBr(I); case Instruction::ICmp: case Instruction::FCmp: return SelectCmp(I); @@ -2435,19 +2502,33 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { case Instruction::FPTrunc: return SelectFPTrunc(I); case Instruction::SIToFP: - return SelectSIToFP(I); + return SelectIToFP(I, /*isSigned*/ true); + case Instruction::UIToFP: + return SelectIToFP(I, /*isSigned*/ false); case Instruction::FPToSI: - return SelectFPToSI(I); + return SelectFPToI(I, /*isSigned*/ true); + case Instruction::FPToUI: + return SelectFPToI(I, /*isSigned*/ false); + case Instruction::Add: + return SelectBinaryIntOp(I, ISD::ADD); + case Instruction::Or: + return SelectBinaryIntOp(I, ISD::OR); + case Instruction::Sub: + return SelectBinaryIntOp(I, ISD::SUB); case Instruction::FAdd: - return SelectBinaryOp(I, ISD::FADD); + return SelectBinaryFPOp(I, ISD::FADD); case Instruction::FSub: - return SelectBinaryOp(I, ISD::FSUB); + return SelectBinaryFPOp(I, ISD::FSUB); case Instruction::FMul: - return SelectBinaryOp(I, ISD::FMUL); + return SelectBinaryFPOp(I, ISD::FMUL); case Instruction::SDiv: - return SelectSDiv(I); + return SelectDiv(I, /*isSigned*/ true); + case Instruction::UDiv: + return SelectDiv(I, /*isSigned*/ false); case Instruction::SRem: - return SelectSRem(I); + return SelectRem(I, /*isSigned*/ true); + case Instruction::URem: + return SelectRem(I, /*isSigned*/ false); case Instruction::Call: if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) return SelectIntrinsicCall(*II); @@ -2514,12 +2595,12 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, namespace llvm { llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { - // Completely untested on non-darwin. + // Completely untested on non-iOS. const TargetMachine &TM = funcInfo.MF->getTarget(); // Darwin and thumb1 only for now. const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>(); - if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() && + if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only() && !DisableARMFastISel) return new ARMFastISel(funcInfo); return 0; diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 06944b1..0fd6025 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -1,4 +1,4 @@ -//=======- ARMFrameLowering.cpp - ARM Frame Information --------*- C++ -*-====// +//===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===// // // The LLVM Compiler Infrastructure // @@ -16,23 +16,33 @@ #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +static cl::opt<bool> +SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), + cl::desc("Align ARM NEON spills in prolog and epilog")); + +static MachineBasicBlock::iterator +skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, + unsigned NumAlignedDPRCS2Regs); + /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); - // Mac OS X requires FP not to be clobbered for backtracing purpose. - if (STI.isTargetDarwin()) + // iOS requires FP not to be clobbered for backtracing purpose. + if (STI.isTargetIOS()) return true; const MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -71,7 +81,7 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); } -static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { +static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) { for (unsigned i = 0; CSRegs[i]; ++i) if (Reg == CSRegs[i]) return true; @@ -80,7 +90,7 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { static bool isCSRestore(MachineInstr *MI, const ARMBaseInstrInfo &TII, - const unsigned *CSRegs) { + const uint16_t *CSRegs) { // Integer spill area is handled with "pop". if (MI->getOpcode() == ARM::LDMIA_RET || MI->getOpcode() == ARM::t2LDMIA_RET || @@ -139,6 +149,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // belongs to which callee-save spill areas. unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; + int D8SpillFI = 0; // Allocate the vararg register save area. This is not counted in NumBytes. if (VARegSaveSize) @@ -172,7 +183,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::R11: if (Reg == FramePtr) FramePtrSpillFI = FI; - if (STI.isTargetDarwin()) { + if (STI.isTargetIOS()) { AFI->addGPRCalleeSavedArea2Frame(FI); GPRCS2Size += 4; } else { @@ -181,8 +192,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { } break; default: - AFI->addDPRCalleeSavedAreaFrame(FI); - DPRCSSize += 8; + // This is a DPR. Exclude the aligned DPRCS2 spills. + if (Reg == ARM::D8) + D8SpillFI = FI; + if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) { + AFI->addDPRCalleeSavedAreaFrame(FI); + DPRCSSize += 8; + } } } @@ -190,8 +206,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { if (GPRCS1Size > 0) MBBI++; // Set FP to point to the stack slot that contains the previous FP. - // For Darwin, FP is R7, which has now been stored in spill area 1. - // Otherwise, if this is not Darwin, all the callee-saved registers go + // For iOS, FP is R7, which has now been stored in spill area 1. + // Otherwise, if this is not iOS, all the callee-saved registers go // into spill area 1, including the FP in R11. In either case, it is // now safe to emit this assignment. bool HasFP = hasFP(MF); @@ -227,7 +243,17 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MBBI++; } - NumBytes = DPRCSOffset; + // Move past the aligned DPRCS2 area. + if (AFI->getNumAlignedDPRCS2Regs() > 0) { + MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs()); + // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and + // leaves the stack pointer pointing to the DPRCS2 area. + // + // Adjust NumBytes to represent the stack slots below the DPRCS2 area. + NumBytes += MFI->getObjectOffset(D8SpillFI); + } else + NumBytes = DPRCSOffset; + if (NumBytes) { // Adjust SP after all the callee-save spills. emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, @@ -254,7 +280,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // If we need dynamic stack realignment, do it here. Be paranoid and make // sure if we also have VLAs, we have a base pointer for frame access. - if (RegInfo->needsStackRealignment(MF)) { + // If aligned NEON registers were spilled, the stack has already been + // realigned. + if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) { unsigned MaxAlign = MFI->getMaxAlignment(); assert (!AFI->isThumb1OnlyFunction()); if (!AFI->isThumbFunction()) { @@ -331,7 +359,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); } else { // Unwind MBBI to point to first LDR / VLDRD. - const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); if (MBBI != MBB.begin()) { do --MBBI; @@ -355,7 +383,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, ARMCC::AL, 0, TII); else { // It's not possible to restore SP from FP in a single instruction. - // For Darwin, this looks like: + // For iOS, this looks like: // mov sp, r7 // sub sp, #24 // This is bad, if an interrupt is taken after the mov, sp is in an @@ -471,6 +499,10 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, else if (AFI->isDPRCalleeSavedAreaFrame(FI)) return Offset - AFI->getDPRCalleeSavedAreaOffset(); + // SP can move around if there are allocas. We may also lose track of SP + // when emergency spilling inside a non-reserved call frame setup. + bool hasMovingSP = MFI->hasVarSizedObjects() || !hasReservedCallFrame(MF); + // When dynamically realigning the stack, use the frame pointer for // parameters, and the stack/base pointer for locals. if (RegInfo->needsStackRealignment(MF)) { @@ -478,7 +510,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, if (isFixed) { FrameReg = RegInfo->getFrameRegister(MF); Offset = FPOffset; - } else if (MFI->hasVarSizedObjects()) { + } else if (hasMovingSP) { assert(RegInfo->hasBasePointer(MF) && "VLAs and dynamic stack alignment, but missing base pointer!"); FrameReg = RegInfo->getBaseRegister(); @@ -490,11 +522,10 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, if (hasFP(MF) && AFI->hasStackFrame()) { // Use frame pointer to reference fixed objects. Use it for locals if // there are VLAs (and thus the SP isn't reliable as a base). - if (isFixed || (MFI->hasVarSizedObjects() && - !RegInfo->hasBasePointer(MF))) { + if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) { FrameReg = RegInfo->getFrameRegister(MF); return FPOffset; - } else if (MFI->hasVarSizedObjects()) { + } else if (hasMovingSP) { assert(RegInfo->hasBasePointer(MF) && "missing base pointer!"); if (AFI->isThumb2Function()) { // Try to use the frame pointer if we can, else use the base pointer @@ -541,6 +572,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, unsigned StmOpc, unsigned StrOpc, bool NoGap, bool(*Func)(unsigned, bool), + unsigned NumAlignedDPRCS2Regs, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); @@ -554,7 +586,11 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, unsigned LastReg = 0; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.isTargetDarwin())) continue; + if (!(Func)(Reg, STI.isTargetIOS())) continue; + + // D-registers in the aligned area DPRCS2 are NOT spilled here. + if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) + continue; // Add the callee-saved register as live-in unless it's LR and // @llvm.returnaddress is called. If LR is returned for @@ -604,7 +640,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc, unsigned LdrOpc, bool isVarArg, bool NoGap, - bool(*Func)(unsigned, bool)) const { + bool(*Func)(unsigned, bool), + unsigned NumAlignedDPRCS2Regs) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -622,7 +659,11 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, bool DeleteRet = false; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.isTargetDarwin())) continue; + if (!(Func)(Reg, STI.isTargetIOS())) continue; + + // The aligned reloads from area DPRCS2 are not inserted here. + if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) + continue; if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) { Reg = ARM::PC; @@ -676,6 +717,247 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, } } +/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers +/// starting from d8. Also insert stack realignment code and leave the stack +/// pointer pointing to the d8 spill slot. +static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned NumAlignedDPRCS2Regs, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + // Mark the D-register spill slots as properly aligned. Since MFI computes + // stack slot layout backwards, this can actually mean that the d-reg stack + // slot offsets can be wrong. The offset for d8 will always be correct. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned DNum = CSI[i].getReg() - ARM::D8; + if (DNum >= 8) + continue; + int FI = CSI[i].getFrameIdx(); + // The even-numbered registers will be 16-byte aligned, the odd-numbered + // registers will be 8-byte aligned. + MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16); + + // The stack slot for D8 needs to be maximally aligned because this is + // actually the point where we align the stack pointer. MachineFrameInfo + // computes all offsets relative to the incoming stack pointer which is a + // bit weird when realigning the stack. Any extra padding for this + // over-alignment is not realized because the code inserted below adjusts + // the stack pointer by numregs * 8 before aligning the stack pointer. + if (DNum == 0) + MFI.setObjectAlignment(FI, MFI.getMaxAlignment()); + } + + // Move the stack pointer to the d8 spill slot, and align it at the same + // time. Leave the stack slot address in the scratch register r4. + // + // sub r4, sp, #numregs * 8 + // bic r4, r4, #align - 1 + // mov sp, r4 + // + bool isThumb = AFI->isThumbFunction(); + assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1"); + AFI->setShouldRestoreSPFromFP(true); + + // sub r4, sp, #numregs * 8 + // The immediate is <= 64, so it doesn't need any special encoding. + unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri; + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) + .addReg(ARM::SP) + .addImm(8 * NumAlignedDPRCS2Regs))); + + // bic r4, r4, #align-1 + Opc = isThumb ? ARM::t2BICri : ARM::BICri; + unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment(); + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) + .addReg(ARM::R4, RegState::Kill) + .addImm(MaxAlign - 1))); + + // mov sp, r4 + // The stack pointer must be adjusted before spilling anything, otherwise + // the stack slots could be clobbered by an interrupt handler. + // Leave r4 live, it is used below. + Opc = isThumb ? ARM::tMOVr : ARM::MOVr; + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP) + .addReg(ARM::R4); + MIB = AddDefaultPred(MIB); + if (!isThumb) + AddDefaultCC(MIB); + + // Now spill NumAlignedDPRCS2Regs registers starting from d8. + // r4 holds the stack slot address. + unsigned NextReg = ARM::D8; + + // 16-byte aligned vst1.64 with 4 d-regs and address writeback. + // The writeback is only needed when emitting two vst1.64 instructions. + if (NumAlignedDPRCS2Regs >= 6) { + unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, + ARM::QQPRRegisterClass); + MBB.addLiveIn(SupReg); + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), + ARM::R4) + .addReg(ARM::R4, RegState::Kill).addImm(16) + .addReg(NextReg) + .addReg(SupReg, RegState::ImplicitKill)); + NextReg += 4; + NumAlignedDPRCS2Regs -= 4; + } + + // We won't modify r4 beyond this point. It currently points to the next + // register to be spilled. + unsigned R4BaseReg = NextReg; + + // 16-byte aligned vst1.64 with 4 d-regs, no writeback. + if (NumAlignedDPRCS2Regs >= 4) { + unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, + ARM::QQPRRegisterClass); + MBB.addLiveIn(SupReg); + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q)) + .addReg(ARM::R4).addImm(16).addReg(NextReg) + .addReg(SupReg, RegState::ImplicitKill)); + NextReg += 4; + NumAlignedDPRCS2Regs -= 4; + } + + // 16-byte aligned vst1.64 with 2 d-regs. + if (NumAlignedDPRCS2Regs >= 2) { + unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, + ARM::QPRRegisterClass); + MBB.addLiveIn(SupReg); + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64)) + .addReg(ARM::R4).addImm(16).addReg(SupReg)); + NextReg += 2; + NumAlignedDPRCS2Regs -= 2; + } + + // Finally, use a vanilla vstr.64 for the odd last register. + if (NumAlignedDPRCS2Regs) { + MBB.addLiveIn(NextReg); + // vstr.64 uses addrmode5 which has an offset scale of 4. + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD)) + .addReg(NextReg) + .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2)); + } + + // The last spill instruction inserted should kill the scratch register r4. + llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI); +} + +/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an +/// iterator to the following instruction. +static MachineBasicBlock::iterator +skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, + unsigned NumAlignedDPRCS2Regs) { + // sub r4, sp, #numregs * 8 + // bic r4, r4, #align - 1 + // mov sp, r4 + ++MI; ++MI; ++MI; + assert(MI->mayStore() && "Expecting spill instruction"); + + // These switches all fall through. + switch(NumAlignedDPRCS2Regs) { + case 7: + ++MI; + assert(MI->mayStore() && "Expecting spill instruction"); + default: + ++MI; + assert(MI->mayStore() && "Expecting spill instruction"); + case 1: + case 2: + case 4: + assert(MI->killsRegister(ARM::R4) && "Missed kill flag"); + ++MI; + } + return MI; +} + +/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers +/// starting from d8. These instructions are assumed to execute while the +/// stack is still aligned, unlike the code inserted by emitPopInst. +static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned NumAlignedDPRCS2Regs, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + // Find the frame index assigned to d8. + int D8SpillFI = 0; + for (unsigned i = 0, e = CSI.size(); i != e; ++i) + if (CSI[i].getReg() == ARM::D8) { + D8SpillFI = CSI[i].getFrameIdx(); + break; + } + + // Materialize the address of the d8 spill slot into the scratch register r4. + // This can be fairly complicated if the stack frame is large, so just use + // the normal frame index elimination mechanism to do it. This code runs as + // the initial part of the epilog where the stack and base pointers haven't + // been changed yet. + bool isThumb = AFI->isThumbFunction(); + assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1"); + + unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) + .addFrameIndex(D8SpillFI).addImm(0))); + + // Now restore NumAlignedDPRCS2Regs registers starting from d8. + unsigned NextReg = ARM::D8; + + // 16-byte aligned vld1.64 with 4 d-regs and writeback. + if (NumAlignedDPRCS2Regs >= 6) { + unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, + ARM::QQPRRegisterClass); + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg) + .addReg(ARM::R4, RegState::Define) + .addReg(ARM::R4, RegState::Kill).addImm(16) + .addReg(SupReg, RegState::ImplicitDefine)); + NextReg += 4; + NumAlignedDPRCS2Regs -= 4; + } + + // We won't modify r4 beyond this point. It currently points to the next + // register to be spilled. + unsigned R4BaseReg = NextReg; + + // 16-byte aligned vld1.64 with 4 d-regs, no writeback. + if (NumAlignedDPRCS2Regs >= 4) { + unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, + ARM::QQPRRegisterClass); + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg) + .addReg(ARM::R4).addImm(16) + .addReg(SupReg, RegState::ImplicitDefine)); + NextReg += 4; + NumAlignedDPRCS2Regs -= 4; + } + + // 16-byte aligned vld1.64 with 2 d-regs. + if (NumAlignedDPRCS2Regs >= 2) { + unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, + ARM::QPRRegisterClass); + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg) + .addReg(ARM::R4).addImm(16)); + NextReg += 2; + NumAlignedDPRCS2Regs -= 2; + } + + // Finally, use a vanilla vldr.64 for the remaining odd register. + if (NumAlignedDPRCS2Regs) + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg) + .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg))); + + // Last store kills r4. + llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI); +} + bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, @@ -690,12 +972,19 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM; unsigned FltOpc = ARM::VSTMDDB_UPD; - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, + unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0, MachineInstr::FrameSetup); - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0, MachineInstr::FrameSetup); emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, - MachineInstr::FrameSetup); + NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); + + // The code above does not insert spill code for the aligned DPRCS2 registers. + // The stack realignment code will be inserted between the push instructions + // and these spills. + if (NumAlignedDPRCS2Regs) + emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); return true; } @@ -710,15 +999,22 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; + unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); + + // The emitPopInst calls below do not insert reloads for the aligned DPRCS2 + // registers. Do that here instead. + if (NumAlignedDPRCS2Regs) + emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM; unsigned FltOpc = ARM::VLDMDIA_UPD; - emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register); + emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, + NumAlignedDPRCS2Regs); emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, - &isARMArea2Register); + &isARMArea2Register, 0); emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, - &isARMArea1Register); + &isARMArea1Register, 0); return true; } @@ -842,6 +1138,55 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, return Limit; } +// In functions that realign the stack, it can be an advantage to spill the +// callee-saved vector registers after realigning the stack. The vst1 and vld1 +// instructions take alignment hints that can improve performance. +// +static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { + MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0); + if (!SpillAlignedNEONRegs) + return; + + // Naked functions don't spill callee-saved registers. + if (MF.getFunction()->hasFnAttr(Attribute::Naked)) + return; + + // We are planning to use NEON instructions vst1 / vld1. + if (!MF.getTarget().getSubtarget<ARMSubtarget>().hasNEON()) + return; + + // Don't bother if the default stack alignment is sufficiently high. + if (MF.getTarget().getFrameLowering()->getStackAlignment() >= 8) + return; + + // Aligned spills require stack realignment. + const ARMBaseRegisterInfo *RegInfo = + static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); + if (!RegInfo->canRealignStack(MF)) + return; + + // We always spill contiguous d-registers starting from d8. Count how many + // needs spilling. The register allocator will almost always use the + // callee-saved registers in order, but it can happen that there are holes in + // the range. Registers above the hole will be spilled to the standard DPRCS + // area. + MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned NumSpills = 0; + for (; NumSpills < 8; ++NumSpills) + if (!MRI.isPhysRegOrOverlapUsed(ARM::D8 + NumSpills)) + break; + + // Don't do this for just one d-register. It's not worth it. + if (NumSpills < 2) + return; + + // Spill the first NumSpills D-registers after realigning the stack. + MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills); + + // A scratch register is required for the vst1 / vld1 instructions. + MF.getRegInfo().setPhysRegUsed(ARM::R4); +} + void ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { @@ -888,28 +1233,22 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, MF.getRegInfo().setPhysRegUsed(ARM::R4); } + // See if we can spill vector registers to aligned stack. + checkNumAlignedDPRCS2Regs(MF); + // Spill the BasePtr if it's used. if (RegInfo->hasBasePointer(MF)) MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. - const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; bool Spilled = false; - if (MF.getRegInfo().isPhysRegUsed(Reg)) { + if (MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) { Spilled = true; CanEliminateFrame = false; - } else { - // Check alias registers too. - for (const unsigned *Aliases = - RegInfo->getAliasSet(Reg); *Aliases; ++Aliases) { - if (MF.getRegInfo().isPhysRegUsed(*Aliases)) { - Spilled = true; - CanEliminateFrame = false; - } - } } if (!ARM::GPRRegisterClass->contains(Reg)) @@ -918,7 +1257,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (Spilled) { NumGPRSpills++; - if (!STI.isTargetDarwin()) { + if (!STI.isTargetIOS()) { if (Reg == ARM::LR) LRSpilled = true; CS1Spilled = true; @@ -938,7 +1277,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, break; } } else { - if (!STI.isTargetDarwin()) { + if (!STI.isTargetIOS()) { UnspilledCS1GPRs.push_back(Reg); continue; } diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index 61bb8af..a1c2b93 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -63,12 +63,13 @@ public: void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc, unsigned StrOpc, bool NoGap, - bool(*Func)(unsigned, bool), + bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs, unsigned MIFlags = 0) const; void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc, unsigned LdrOpc, bool isVarArg, bool NoGap, - bool(*Func)(unsigned, bool)) const; + bool(*Func)(unsigned, bool), + unsigned NumAlignedDPRCS2Regs) const; }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 7473141..c99db98 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -244,6 +244,7 @@ private: /// SelectCMOVOp - Select CMOV instructions for ARM. SDNode *SelectCMOVOp(SDNode *N); + SDNode *SelectConditionalOp(SDNode *N); SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); @@ -1562,10 +1563,6 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; - case ARM::VLD1q8PseudoWB_fixed: return ARM::VLD1q8PseudoWB_register; - case ARM::VLD1q16PseudoWB_fixed: return ARM::VLD1q16PseudoWB_register; - case ARM::VLD1q32PseudoWB_fixed: return ARM::VLD1q32PseudoWB_register; - case ARM::VLD1q64PseudoWB_fixed: return ARM::VLD1q64PseudoWB_register; case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; @@ -1575,26 +1572,26 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; - case ARM::VST1q8PseudoWB_fixed: return ARM::VST1q8PseudoWB_register; - case ARM::VST1q16PseudoWB_fixed: return ARM::VST1q16PseudoWB_register; - case ARM::VST1q32PseudoWB_fixed: return ARM::VST1q32PseudoWB_register; - case ARM::VST1q64PseudoWB_fixed: return ARM::VST1q64PseudoWB_register; case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; - case ARM::VLD2d8PseudoWB_fixed: return ARM::VLD2d8PseudoWB_register; - case ARM::VLD2d16PseudoWB_fixed: return ARM::VLD2d16PseudoWB_register; - case ARM::VLD2d32PseudoWB_fixed: return ARM::VLD2d32PseudoWB_register; + case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; + case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; + case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; - case ARM::VST2d8PseudoWB_fixed: return ARM::VST2d8PseudoWB_register; - case ARM::VST2d16PseudoWB_fixed: return ARM::VST2d16PseudoWB_register; - case ARM::VST2d32PseudoWB_fixed: return ARM::VST2d32PseudoWB_register; + case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; + case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; + case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; + + case ARM::VLD2DUPd8PseudoWB_fixed: return ARM::VLD2DUPd8PseudoWB_register; + case ARM::VLD2DUPd16PseudoWB_fixed: return ARM::VLD2DUPd16PseudoWB_register; + case ARM::VLD2DUPd32PseudoWB_fixed: return ARM::VLD2DUPd32PseudoWB_register; } return Opc; // If not one we handle, return it unchanged. } @@ -1668,7 +1665,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, Opc = getVLDSTRegisterUpdateOpcode(Opc); // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64PseudoWB_fixed) || + if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64wb_fixed) || !isa<ConstantSDNode>(Inc.getNode())) Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); } @@ -1818,7 +1815,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Opc = getVLDSTRegisterUpdateOpcode(Opc); // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs != 1 && Opc != ARM::VST1q64PseudoWB_fixed) || + if ((NumVecs > 2 && Opc != ARM::VST1q64wb_fixed) || !isa<ConstantSDNode>(Inc.getNode())) Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); } @@ -2043,8 +2040,14 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, Ops.push_back(MemAddr); Ops.push_back(Align); if (isUpdating) { + // fixed-stride update instructions don't have an explicit writeback + // operand. It's implicit in the opcode itself. SDValue Inc = N->getOperand(2); - Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); + if (!isa<ConstantSDNode>(Inc.getNode())) + Ops.push_back(Inc); + // FIXME: VLD3 and VLD4 haven't been updated to that form yet. + else if (NumVecs > 2) + Ops.push_back(Reg0); } Ops.push_back(Pred); Ops.push_back(Reg0); @@ -2182,7 +2185,6 @@ SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, case ARM_AM::ror: Opc = ARM::t2MOVCCror; break; default: llvm_unreachable("Unknown so_reg opcode!"); - break; } SDValue SOShImm = CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32); @@ -2293,9 +2295,6 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) // Pattern complexity = 18 cost = 1 size = 0 - SDValue CPTmp0; - SDValue CPTmp1; - SDValue CPTmp2; if (Subtarget->isThumb()) { SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal, CCVal, CCR, InFlag); @@ -2352,8 +2351,7 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag }; unsigned Opc = 0; switch (VT.getSimpleVT().SimpleTy) { - default: assert(false && "Illegal conditional move type!"); - break; + default: llvm_unreachable("Illegal conditional move type!"); case MVT::i32: Opc = Subtarget->isThumb() ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo) @@ -2369,6 +2367,115 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); } +SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) { + SDValue FalseVal = N->getOperand(0); + SDValue TrueVal = N->getOperand(1); + ARMCC::CondCodes CCVal = + (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + SDValue CCR = N->getOperand(3); + assert(CCR.getOpcode() == ISD::Register); + SDValue InFlag = N->getOperand(4); + SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + + if (Subtarget->isThumb()) { + SDValue CPTmp0; + SDValue CPTmp1; + if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::t2ANDCCrs; break; + case ARMISD::COR: Opc = ARM::t2ORRCCrs; break; + case ARMISD::CXOR: Opc = ARM::t2EORCCrs; break; + } + SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7); + } + + ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); + if (T) { + unsigned TrueImm = T->getZExtValue(); + if (is_t2_so_imm(TrueImm)) { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::t2ANDCCri; break; + case ARMISD::COR: Opc = ARM::t2ORRCCri; break; + case ARMISD::CXOR: Opc = ARM::t2EORCCri; break; + } + SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); + SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); + } + } + + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::t2ANDCCrr; break; + case ARMISD::COR: Opc = ARM::t2ORRCCrr; break; + case ARMISD::CXOR: Opc = ARM::t2EORCCrr; break; + } + SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); + } + + SDValue CPTmp0; + SDValue CPTmp1; + SDValue CPTmp2; + if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::ANDCCrsi; break; + case ARMISD::COR: Opc = ARM::ORRCCrsi; break; + case ARMISD::CXOR: Opc = ARM::EORCCrsi; break; + } + SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7); + } + + if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::ANDCCrsr; break; + case ARMISD::COR: Opc = ARM::ORRCCrsr; break; + case ARMISD::CXOR: Opc = ARM::EORCCrsr; break; + } + SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8); + } + + ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); + if (T) { + unsigned TrueImm = T->getZExtValue(); + if (is_so_imm(TrueImm)) { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::ANDCCri; break; + case ARMISD::COR: Opc = ARM::ORRCCri; break; + case ARMISD::CXOR: Opc = ARM::EORCCri; break; + } + SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); + SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); + } + } + + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::ANDCCrr; break; + case ARMISD::COR: Opc = ARM::ORRCCrr; break; + case ARMISD::CXOR: Opc = ARM::EORCCrr; break; + } + SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); +} + /// Target-specific DAG combining for ISD::XOR. /// Target-independent combining lowers SELECT_CC nodes of the form /// select_cc setg[ge] X, 0, X, -X @@ -2706,6 +2813,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::CMOV: return SelectCMOVOp(N); + case ARMISD::CAND: + case ARMISD::COR: + case ARMISD::CXOR: + return SelectConditionalOp(N); case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); @@ -2798,8 +2909,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VLD2DUP_UPD: { - unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd16Pseudo_UPD, - ARM::VLD2DUPd32Pseudo_UPD }; + unsigned Opcodes[] = { ARM::VLD2DUPd8PseudoWB_fixed, + ARM::VLD2DUPd16PseudoWB_fixed, + ARM::VLD2DUPd32PseudoWB_fixed }; return SelectVLDDup(N, true, 2, Opcodes); } @@ -2818,18 +2930,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VLD1_UPD: { unsigned DOpcodes[] = { ARM::VLD1d8wb_fixed, ARM::VLD1d16wb_fixed, ARM::VLD1d32wb_fixed, ARM::VLD1d64wb_fixed }; - unsigned QOpcodes[] = { ARM::VLD1q8PseudoWB_fixed, - ARM::VLD1q16PseudoWB_fixed, - ARM::VLD1q32PseudoWB_fixed, - ARM::VLD1q64PseudoWB_fixed }; + unsigned QOpcodes[] = { ARM::VLD1q8wb_fixed, + ARM::VLD1q16wb_fixed, + ARM::VLD1q32wb_fixed, + ARM::VLD1q64wb_fixed }; return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0); } case ARMISD::VLD2_UPD: { - unsigned DOpcodes[] = { ARM::VLD2d8PseudoWB_fixed, - ARM::VLD2d16PseudoWB_fixed, - ARM::VLD2d32PseudoWB_fixed, - ARM::VLD1q64PseudoWB_fixed}; + unsigned DOpcodes[] = { ARM::VLD2d8wb_fixed, + ARM::VLD2d16wb_fixed, + ARM::VLD2d32wb_fixed, + ARM::VLD1q64wb_fixed}; unsigned QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q32PseudoWB_fixed }; @@ -2838,7 +2950,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VLD3_UPD: { unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD, - ARM::VLD3d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed}; + ARM::VLD3d32Pseudo_UPD, ARM::VLD1q64wb_fixed}; unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, ARM::VLD3q16Pseudo_UPD, ARM::VLD3q32Pseudo_UPD }; @@ -2850,7 +2962,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VLD4_UPD: { unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, - ARM::VLD4d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed}; + ARM::VLD4d32Pseudo_UPD, ARM::VLD1q64wb_fixed}; unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, ARM::VLD4q16Pseudo_UPD, ARM::VLD4q32Pseudo_UPD }; @@ -2887,18 +2999,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VST1_UPD: { unsigned DOpcodes[] = { ARM::VST1d8wb_fixed, ARM::VST1d16wb_fixed, ARM::VST1d32wb_fixed, ARM::VST1d64wb_fixed }; - unsigned QOpcodes[] = { ARM::VST1q8PseudoWB_fixed, - ARM::VST1q16PseudoWB_fixed, - ARM::VST1q32PseudoWB_fixed, - ARM::VST1q64PseudoWB_fixed }; + unsigned QOpcodes[] = { ARM::VST1q8wb_fixed, + ARM::VST1q16wb_fixed, + ARM::VST1q32wb_fixed, + ARM::VST1q64wb_fixed }; return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0); } case ARMISD::VST2_UPD: { - unsigned DOpcodes[] = { ARM::VST2d8PseudoWB_fixed, - ARM::VST2d16PseudoWB_fixed, - ARM::VST2d32PseudoWB_fixed, - ARM::VST1q64PseudoWB_fixed}; + unsigned DOpcodes[] = { ARM::VST2d8wb_fixed, + ARM::VST2d16wb_fixed, + ARM::VST2d32wb_fixed, + ARM::VST1q64wb_fixed}; unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, ARM::VST2q16PseudoWB_fixed, ARM::VST2q32PseudoWB_fixed }; @@ -3068,14 +3180,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vld1: { unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, ARM::VLD1d32, ARM::VLD1d64 }; - unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo, - ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo }; + unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, + ARM::VLD1q32, ARM::VLD1q64}; return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vld2: { - unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo, - ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo }; + unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, + ARM::VLD2d32, ARM::VLD1q64 }; unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, ARM::VLD2q32Pseudo }; return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0); @@ -3129,14 +3241,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vst1: { unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, ARM::VST1d32, ARM::VST1d64 }; - unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo, - ARM::VST1q32Pseudo, ARM::VST1q64Pseudo }; + unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, + ARM::VST1q32, ARM::VST1q64 }; return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vst2: { - unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo, - ARM::VST2d32Pseudo, ARM::VST1q64Pseudo }; + unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, + ARM::VST2d32, ARM::VST1q64 }; unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, ARM::VST2q32Pseudo }; return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0); @@ -3197,14 +3309,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { break; case Intrinsic::arm_neon_vtbl2: - return SelectVTBL(N, false, 2, ARM::VTBL2Pseudo); + return SelectVTBL(N, false, 2, ARM::VTBL2); case Intrinsic::arm_neon_vtbl3: return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo); case Intrinsic::arm_neon_vtbl4: return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo); case Intrinsic::arm_neon_vtbx2: - return SelectVTBL(N, true, 2, ARM::VTBX2Pseudo); + return SelectVTBL(N, true, 2, ARM::VTBX2); case Intrinsic::arm_neon_vtbx3: return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo); case Intrinsic::arm_neon_vtbx4: @@ -3238,7 +3350,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { Ops.push_back(N->getOperand(2)); Ops.push_back(getAL(CurDAG)); // Predicate Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register - return CurDAG->getMachineNode(ARM::VTBL2Pseudo, dl, VT, + return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops.data(), Ops.size()); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index c6c1f5b..477b5f4 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -43,7 +43,6 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/VectorExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" @@ -262,7 +261,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallName(RTLIB::SRL_I128, 0); setLibcallName(RTLIB::SRA_I128, 0); - if (Subtarget->isAAPCS_ABI()) { + if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) { // Double-precision floating-point arithmetic helper functions // RTABI chapter 4.1.2, Table 2 setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); @@ -387,8 +386,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // Long long helper functions // RTABI chapter 4.2, Table 9 setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); - setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); - setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); @@ -404,21 +401,28 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); + setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); + setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); // Memory operations // RTABI chapter 4.3.4 setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); + setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS); } // Use divmod compiler-rt calls for iOS 5.0 and later. @@ -529,9 +533,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SETCC, MVT::v1i64, Expand); setOperationAction(ISD::SETCC, MVT::v2i64, Expand); // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with - // a destination type that is wider than the source. + // a destination type that is wider than the source, and nor does + // it have a FP_TO_[SU]INT instruction with a narrower destination than + // source. setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); @@ -551,7 +559,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::FP_TO_UINT); setTargetDAGCombine(ISD::FDIV); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand); + // It is legal to extload from v4i8 to v4i16 or v4i32. + MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8, + MVT::v4i16, MVT::v2i16, + MVT::v2i32}; + for (unsigned i = 0; i < 6; ++i) { + setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal); + setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal); + setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal); + } } computeRegisterProperties(); @@ -643,10 +659,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::VAEND, MVT::Other, Expand); setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); - setExceptionPointerRegister(ARM::R0); - setExceptionSelectorRegister(ARM::R1); + + if (!Subtarget->isTargetDarwin()) { + // Non-Darwin platforms may return values in these registers via the + // personality function. + setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setExceptionPointerRegister(ARM::R0); + setExceptionSelectorRegister(ARM::R1); + } setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use @@ -773,10 +794,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::MUL); - if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) - setTargetDAGCombine(ISD::OR); - if (Subtarget->hasNEON()) + if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) { setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::XOR); + } + + if (Subtarget->hasV6Ops()) + setTargetDAGCombine(ISD::SRL); setStackPointerRegisterToSaveRestore(ARM::SP); @@ -869,7 +894,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; + case ARMISD::CMOV: return "ARMISD::CMOV"; + case ARMISD::CAND: return "ARMISD::CAND"; + case ARMISD::COR: return "ARMISD::COR"; + case ARMISD::CXOR: return "ARMISD::CXOR"; case ARMISD::RBIT: return "ARMISD::RBIT"; @@ -990,7 +1019,7 @@ EVT ARMTargetLowering::getSetCCResultType(EVT VT) const { /// getRegClassFor - Return the register class that should be used for the /// specified value type. -TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { +const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { // Map v4i64 to QQ registers but do not make the type legal. Similarly map // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to // load / store 4 to 8 consecutive D registers. @@ -1128,7 +1157,9 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); } case CallingConv::ARM_AAPCS_VFP: - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + if (!isVarArg) + return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + // Fallthrough case CallingConv::ARM_AAPCS: return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); case CallingConv::ARM_APCS: @@ -1255,7 +1286,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, SDValue ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -1551,12 +1582,20 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; + else if (doesNotRet && isDirect && !isARMFunc && + Subtarget->hasRAS() && !Subtarget->isThumb1Only()) + // "mov lr, pc; b _foo" to avoid confusing the RSP + CallOpc = ARMISD::CALL_NOLINK; else CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; } else { - CallOpc = (isDirect || Subtarget->hasV5TOps()) - ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) - : ARMISD::CALL_NOLINK; + if (!isDirect && !Subtarget->hasV5TOps()) { + CallOpc = ARMISD::CALL_NOLINK; + } else if (doesNotRet && isDirect && Subtarget->hasRAS()) + // "mov lr, pc; b _foo" to avoid confusing the RSP + CallOpc = ARMISD::CALL_NOLINK; + else + CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; } std::vector<SDValue> Ops; @@ -1569,6 +1608,12 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + if (InFlag.getNode()) Ops.push_back(InFlag); @@ -1897,7 +1942,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { return false; unsigned NumCopies = 0; - SDNode* Copies[2]; + SDNode* Copies[2] = { 0, 0 }; SDNode *Use = *N->use_begin(); if (Use->getOpcode() == ISD::CopyToReg) { Copies[NumCopies++] = Use; @@ -1932,7 +1977,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { UI != UE; ++UI) { if (UI->getOpcode() == ISD::CopyToReg) { SDNode *Use = *UI; - if (Use == Copies[0] || Use == Copies[1]) + if (Use == Copies[0] || ((NumCopies == 2) && (Use == Copies[1]))) continue; return false; } @@ -2043,7 +2088,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, std::pair<SDValue, SDValue> CallResult = LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()), false, false, false, false, - 0, CallingConv::C, false, /*isReturnValueUsed=*/true, + 0, CallingConv::C, /*isTailCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); return CallResult.first; } @@ -2167,7 +2213,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - // FIXME: Enable this for static codegen when tool issues are fixed. + // FIXME: Enable this for static codegen when tool issues are fixed. Also + // update ARMFastISel::ARMMaterializeGV. if (Subtarget->useMovt() && RelocM != Reloc::Static) { ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register @@ -2398,7 +2445,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - TargetRegisterClass *RC; + const TargetRegisterClass *RC; if (AFI->isThumb1OnlyFunction()) RC = ARM::tGPRRegisterClass; else @@ -2484,7 +2531,7 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, SmallVector<SDValue, 4> MemOps; for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) { - TargetRegisterClass *RC; + const TargetRegisterClass *RC; if (AFI->isThumb1OnlyFunction()) RC = ARM::tGPRRegisterClass; else @@ -2567,7 +2614,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); } else { - TargetRegisterClass *RC; + const TargetRegisterClass *RC; if (RegVT == MVT::f32) RC = ARM::SPRRegisterClass; @@ -2809,6 +2856,11 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } } + // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the + // undefined bits before doing a full-word comparison with zero. + Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, + DAG.getConstant(1, Cond.getValueType())); + return DAG.getSelectCC(dl, Cond, DAG.getConstant(0, Cond.getValueType()), SelectTrue, SelectFalse, ISD::SETNE); @@ -2926,12 +2978,11 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { SDValue Dest = Op.getOperand(4); DebugLoc dl = Op.getDebugLoc(); - bool SeenZero = false; - if (canChangeToInt(LHS, SeenZero, Subtarget) && - canChangeToInt(RHS, SeenZero, Subtarget) && - // If one of the operand is zero, it's safe to ignore the NaN case since - // we only care about equality comparisons. - (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) { + bool LHSSeenZero = false; + bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); + bool RHSSeenZero = false; + bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); + if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { // If unsafe fp math optimization is enabled and there are no other uses of // the CMP operands, and the condition code is EQ or NE, we can optimize it // to an integer comparison. @@ -2940,10 +2991,13 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { else if (CC == ISD::SETUNE) CC = ISD::SETNE; + SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32); SDValue ARMcc; if (LHS.getValueType() == MVT::f32) { - LHS = bitcastf32Toi32(LHS, DAG); - RHS = bitcastf32Toi32(RHS, DAG); + LHS = DAG.getNode(ISD::AND, dl, MVT::i32, + bitcastf32Toi32(LHS, DAG), Mask); + RHS = DAG.getNode(ISD::AND, dl, MVT::i32, + bitcastf32Toi32(RHS, DAG), Mask); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, @@ -2954,6 +3008,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { SDValue RHS1, RHS2; expandf64Toi32(LHS, DAG, LHS1, LHS2); expandf64Toi32(RHS, DAG, RHS1, RHS2); + LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); + RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); ARMCC::CondCodes CondCode = IntCCToARMCC(CC); ARMcc = DAG.getConstant(CondCode, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); @@ -3047,11 +3103,21 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); - assert(VT.getVectorElementType() == MVT::i32 && "Unexpected custom lowering"); + DebugLoc dl = Op.getDebugLoc(); - if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) - return Op; - return DAG.UnrollVectorOp(Op.getNode()); + if (Op.getValueType().getVectorElementType() == MVT::i32) { + if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) + return Op; + return DAG.UnrollVectorOp(Op.getNode()); + } + + assert(Op.getOperand(0).getValueType() == MVT::v4f32 && + "Invalid type for custom lowering!"); + if (VT != MVT::v4i16) + return DAG.UnrollVectorOp(Op.getNode()); + + Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); } static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { @@ -3063,8 +3129,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { unsigned Opc; switch (Op.getOpcode()) { - default: - assert(0 && "Invalid opcode!"); + default: llvm_unreachable("Invalid opcode!"); case ISD::FP_TO_SINT: Opc = ARMISD::FTOSI; break; @@ -3094,8 +3159,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { unsigned CastOpc; unsigned Opc; switch (Op.getOpcode()) { - default: - assert(0 && "Invalid opcode!"); + default: llvm_unreachable("Invalid opcode!"); case ISD::SINT_TO_FP: CastOpc = ISD::SIGN_EXTEND; Opc = ISD::SINT_TO_FP; @@ -3119,8 +3183,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { unsigned Opc; switch (Op.getOpcode()) { - default: - assert(0 && "Invalid opcode!"); + default: llvm_unreachable("Invalid opcode!"); case ISD::SINT_TO_FP: Opc = ARMISD::SITOF; break; @@ -3494,7 +3557,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { if (Op.getOperand(1).getValueType().isFloatingPoint()) { switch (SetCCOpcode) { - default: llvm_unreachable("Illegal FP comparison"); break; + default: llvm_unreachable("Illegal FP comparison"); case ISD::SETUNE: case ISD::SETNE: Invert = true; // Fallthrough case ISD::SETOEQ: @@ -3533,7 +3596,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { } else { // Integer comparisons. switch (SetCCOpcode) { - default: llvm_unreachable("Illegal integer comparison"); break; + default: llvm_unreachable("Illegal integer comparison"); case ISD::SETNE: Invert = true; case ISD::SETEQ: Opc = ARMISD::VCEQ; break; case ISD::SETLT: Swap = true; @@ -3740,14 +3803,13 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, default: llvm_unreachable("unexpected size for isNEONModifiedImm"); - return SDValue(); } unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); return DAG.getTargetConstant(EncodedVal, MVT::i32); } -static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, +static bool isVEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseVEXT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); ReverseVEXT = false; @@ -3786,8 +3848,7 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, /// isVREVMask - Check if a vector shuffle corresponds to a VREV /// instruction with the specified blocksize. (The order of the elements /// within each block of the vector is reversed.) -static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, - unsigned BlockSize) { +static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && "Only possible block sizes for VREV are: 16, 32, 64"); @@ -3813,15 +3874,14 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, return true; } -static bool isVTBLMask(const SmallVectorImpl<int> &M, EVT VT) { +static bool isVTBLMask(ArrayRef<int> M, EVT VT) { // We can handle <8 x i8> vector shuffles. If the index in the mask is out of // range, then 0 is placed into the resulting vector. So pretty much any mask // of 8 elements can work here. return VT == MVT::v8i8 && M.size() == 8; } -static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3839,8 +3899,7 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. -static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3855,8 +3914,7 @@ static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, return true; } -static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3879,8 +3937,7 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, -static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3904,8 +3961,7 @@ static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, return true; } -static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3930,8 +3986,7 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. -static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -4363,7 +4418,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, } static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, - SmallVectorImpl<int> &ShuffleMask, + ArrayRef<int> ShuffleMask, SelectionDAG &DAG) { // Check to see if we can use the VTBL instruction. SDValue V1 = Op.getOperand(0); @@ -4371,7 +4426,7 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, DebugLoc DL = Op.getDebugLoc(); SmallVector<SDValue, 8> VTBLMask; - for (SmallVectorImpl<int>::iterator + for (ArrayRef<int>::iterator I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) VTBLMask.push_back(DAG.getConstant(*I, MVT::i32)); @@ -4391,7 +4446,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); - SmallVector<int, 8> ShuffleMask; // Convert shuffles that are directly supported on NEON to target-specific // DAG nodes, instead of keeping them as shuffles and matching them again @@ -4399,7 +4453,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // of inconsistencies between legalization and selection. // FIXME: floating-point vectors should be canonicalized to integer vectors // of the same time so that they get CSEd properly. - SVN->getMask(ShuffleMask); + ArrayRef<int> ShuffleMask = SVN->getMask(); unsigned EltSize = VT.getVectorElementType().getSizeInBits(); if (EltSize <= 32) { @@ -4959,7 +5013,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { unsigned Opc; bool ExtraOp = false; switch (Op.getOpcode()) { - default: assert(0 && "Invalid code"); + default: llvm_unreachable("Invalid code"); case ISD::ADDC: Opc = ARMISD::ADDC; break; case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break; case ISD::SUBC: Opc = ARMISD::SUBC; break; @@ -5071,7 +5125,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); } - return SDValue(); } /// ReplaceNodeResults - Replace the results of node with an illegal result @@ -5083,7 +5136,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom expand this!"); - break; case ISD::BITCAST: Res = ExpandBITCAST(N, DAG); break; @@ -5279,7 +5331,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - TargetRegisterClass *TRC = + const TargetRegisterClass *TRC = isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; unsigned scratch = MRI.createVirtualRegister(TRC); unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); @@ -5389,7 +5441,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - TargetRegisterClass *TRC = + const TargetRegisterClass *TRC = isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; unsigned scratch = MRI.createVirtualRegister(TRC); unsigned scratch2 = MRI.createVirtualRegister(TRC); @@ -5499,7 +5551,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - TargetRegisterClass *TRC = + const TargetRegisterClass *TRC = isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; unsigned storesuccess = MRI.createVirtualRegister(TRC); @@ -5792,7 +5844,12 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4); - BuildMI(DispatchBB, dl, TII->get(ARM::eh_sjlj_dispatchsetup)); + if (AFI->isThumb1OnlyFunction()) + BuildMI(DispatchBB, dl, TII->get(ARM::tInt_eh_sjlj_dispatchsetup)); + else if (!Subtarget->hasVFP2()) + BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup_nofp)); + else + BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); unsigned NumLPads = LPadList.size(); if (Subtarget->isThumb2()) { @@ -6014,7 +6071,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { // N.B. the order the invoke BBs are processed in doesn't matter here. const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); - const unsigned *SavedRegs = RI.getCalleeSavedRegs(MF); + const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF); SmallVector<MachineBasicBlock*, 64> MBBLPads; for (SmallPtrSet<MachineBasicBlock*, 64>::iterator I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { @@ -6666,7 +6723,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; default: - assert(0 && "Invalid vector element type for padd optimization."); + llvm_unreachable("Invalid vector element type for padd optimization."); } SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), @@ -6818,8 +6875,52 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); } +static bool isCMOVWithZeroOrAllOnesLHS(SDValue N, bool AllOnes) { + if (N.getOpcode() != ARMISD::CMOV || !N.getNode()->hasOneUse()) + return false; + + SDValue FalseVal = N.getOperand(0); + ConstantSDNode *C = dyn_cast<ConstantSDNode>(FalseVal); + if (!C) + return false; + if (AllOnes) + return C->isAllOnesValue(); + return C->isNullValue(); +} + +/// formConditionalOp - Combine an operation with a conditional move operand +/// to form a conditional op. e.g. (or x, (cmov 0, y, cond)) => (or.cond x, y) +/// (and x, (cmov -1, y, cond)) => (and.cond, x, y) +static SDValue formConditionalOp(SDNode *N, SelectionDAG &DAG, + bool Commutable) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + bool isAND = N->getOpcode() == ISD::AND; + bool isCand = isCMOVWithZeroOrAllOnesLHS(N1, isAND); + if (!isCand && Commutable) { + isCand = isCMOVWithZeroOrAllOnesLHS(N0, isAND); + if (isCand) + std::swap(N0, N1); + } + if (!isCand) + return SDValue(); + + unsigned Opc = 0; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ISD::AND: Opc = ARMISD::CAND; break; + case ISD::OR: Opc = ARMISD::COR; break; + case ISD::XOR: Opc = ARMISD::CXOR; break; + } + return DAG.getNode(Opc, N->getDebugLoc(), N->getValueType(0), N0, + N1.getOperand(1), N1.getOperand(2), N1.getOperand(3), + N1.getOperand(4)); +} + static SDValue PerformANDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { // Attempt to use immediate-form VBIC BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); @@ -6850,6 +6951,13 @@ static SDValue PerformANDCombine(SDNode *N, } } + if (!Subtarget->isThumb1Only()) { + // (and x, (cmov -1, y, cond)) => (and.cond x, y) + SDValue CAND = formConditionalOp(N, DAG, true); + if (CAND.getNode()) + return CAND; + } + return SDValue(); } @@ -6886,6 +6994,13 @@ static SDValue PerformORCombine(SDNode *N, } } + if (!Subtarget->isThumb1Only()) { + // (or x, (cmov 0, y, cond)) => (or.cond x, y) + SDValue COR = formConditionalOp(N, DAG, true); + if (COR.getNode()) + return COR; + } + SDValue N0 = N->getOperand(0); if (N0.getOpcode() != ISD::AND) return SDValue(); @@ -7034,6 +7149,25 @@ static SDValue PerformORCombine(SDNode *N, return SDValue(); } +static SDValue PerformXORCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + EVT VT = N->getValueType(0); + SelectionDAG &DAG = DCI.DAG; + + if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + if (!Subtarget->isThumb1Only()) { + // (xor x, (cmov 0, y, cond)) => (xor.cond x, y) + SDValue CXOR = formConditionalOp(N, DAG, true); + if (CXOR.getNode()) + return CXOR; + } + + return SDValue(); +} + /// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff /// the bits being cleared by the AND are not demanded by the BFI. static SDValue PerformBFICombine(SDNode *N, @@ -7331,7 +7465,7 @@ static SDValue CombineBaseUpdate(SDNode *N, if (isIntrinsic) { unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); switch (IntNo) { - default: assert(0 && "unexpected intrinsic for Neon base update"); + default: llvm_unreachable("unexpected intrinsic for Neon base update"); case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD; NumVecs = 1; break; case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD; @@ -7364,7 +7498,7 @@ static SDValue CombineBaseUpdate(SDNode *N, } else { isLaneOp = true; switch (N->getOpcode()) { - default: assert(0 && "unexpected opcode for Neon base update"); + default: llvm_unreachable("unexpected opcode for Neon base update"); case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; @@ -7857,6 +7991,18 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); + if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) { + // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high + // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16. + SDValue N1 = N->getOperand(1); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { + SDValue N0 = N->getOperand(0); + if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && + DAG.MaskedValueIsZero(N0.getOperand(0), + APInt::getHighBitsSet(32, 16))) + return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1); + } + } // Nothing to be done for scalar shifts. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -8085,7 +8231,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ISD::OR: return PerformORCombine(N, DCI, Subtarget); - case ISD::AND: return PerformANDCombine(N, DCI); + case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); + case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); @@ -8377,7 +8524,6 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, if (Scale & 1) return false; return isPowerOf2_32(Scale); } - break; } return true; } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index b8dc4bf..7f12293 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -56,7 +56,11 @@ namespace llvm { CMPFP, // ARM VFP compare instruction, sets FPSCR. CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR. FMSTAT, // ARM fmstat instruction. + CMOV, // ARM conditional move instructions. + CAND, // ARM conditional and instructions. + COR, // ARM conditional or instructions. + CXOR, // ARM conditional xor instructions. BCC_i64, @@ -345,7 +349,7 @@ namespace llvm { /// getRegClassFor - Return the register class that should be used for the /// specified value type. - virtual TargetRegisterClass *getRegClassFor(EVT VT) const; + virtual const TargetRegisterClass *getRegClassFor(EVT VT) const; /// getMaximalGlobalOffset - Returns the maximal possible offset which can /// be used for loads / stores from the global. @@ -458,7 +462,7 @@ namespace llvm { virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 80f3773..1d38bcf 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1,4 +1,4 @@ -//===- ARMInstrFormats.td - ARM Instruction Formats ----------*- tablegen -*-=// +//===-- ARMInstrFormats.td - ARM Instruction Formats -------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -290,6 +290,14 @@ class InstTemplate<AddrMode am, int sz, IndexMode im, class Encoding { field bits<32> Inst; + // Mask of bits that cause an encoding to be UNPREDICTABLE. + // If a bit is set, then if the corresponding bit in the + // target encoding differs from its value in the "Inst" field, + // the instruction is UNPREDICTABLE (SoftFail in abstract parlance). + field bits<32> Unpredictable = 0; + // SoftFail is the generic name for this field, but we alias it so + // as to make it more obvious what it means in ARM-land. + field bits<32> SoftFail = Unpredictable; } class InstARM<AddrMode am, int sz, IndexMode im, @@ -1594,8 +1602,11 @@ class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> { + bits<5> fbits; // size (fixed-point number): sx == 0 ? 16 : 32 let Inst{7} = op5; // sx + let Inst{5} = fbits{0}; + let Inst{3-0} = fbits{4-1}; } // VFP conversion instructions, if no NEON @@ -2019,6 +2030,15 @@ multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> { def : VFPDataTypeInstAlias<opc, ".64", asm, Result>; } +multiclass NEONDTAnyInstAlias<string opc, string asm, dag Result> { + let Predicates = [HasNEON] in { + def : VFPDataTypeInstAlias<opc, ".8", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".16", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".32", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".64", asm, Result>; +} +} + // The same alias classes using AsmPseudo instead, for the more complex // stuff in NEON that InstAlias can't quite handle. // Note that we can't use anonymous defm references here like we can @@ -2026,75 +2046,6 @@ multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> { // for instalias defs. class NEONDataTypeAsmPseudoInst<string opc, string dt, string asm, dag iops> : AsmPseudoInst<!strconcat(opc, dt, "\t", asm), iops>, Requires<[HasNEON]>; -multiclass NEONDT8ReqAsmPseudoInst<string opc, string asm, dag iops> { - def I8 : NEONDataTypeAsmPseudoInst<opc, ".i8", asm, iops>; - def S8 : NEONDataTypeAsmPseudoInst<opc, ".s8", asm, iops>; - def U8 : NEONDataTypeAsmPseudoInst<opc, ".u8", asm, iops>; - def P8 : NEONDataTypeAsmPseudoInst<opc, ".p8", asm, iops>; -} -// NEONDT8ReqAsmPseudoInst plus plain ".8" -multiclass NEONDT8AsmPseudoInst<string opc, string asm, dag iops> { - def _8 : NEONDataTypeAsmPseudoInst<opc, ".8", asm, iops>; - defm _ : NEONDT8ReqAsmPseudoInst<opc, asm, iops>; -} -multiclass NEONDT16ReqAsmPseudoInst<string opc, string asm, dag iops> { - def I16 : NEONDataTypeAsmPseudoInst<opc, ".i16", asm, iops>; - def S16 : NEONDataTypeAsmPseudoInst<opc, ".s16", asm, iops>; - def U16 : NEONDataTypeAsmPseudoInst<opc, ".u16", asm, iops>; - def P16 : NEONDataTypeAsmPseudoInst<opc, ".p16", asm, iops>; -} -// NEONDT16ReqAsmPseudoInst plus plain ".16" -multiclass NEONDT16AsmPseudoInst<string opc, string asm, dag iops> { - def _16 : NEONDataTypeAsmPseudoInst<opc, ".16", asm, iops>; - defm _ : NEONDT16ReqAsmPseudoInst<opc, asm, iops>; -} -multiclass NEONDT32ReqAsmPseudoInst<string opc, string asm, dag iops> { - def I32 : NEONDataTypeAsmPseudoInst<opc, ".i32", asm, iops>; - def S32 : NEONDataTypeAsmPseudoInst<opc, ".s32", asm, iops>; - def U32 : NEONDataTypeAsmPseudoInst<opc, ".u32", asm, iops>; - def F32 : NEONDataTypeAsmPseudoInst<opc, ".f32", asm, iops>; - def F : NEONDataTypeAsmPseudoInst<opc, ".f", asm, iops>; -} -// NEONDT32ReqAsmPseudoInst plus plain ".32" -multiclass NEONDT32AsmPseudoInst<string opc, string asm, dag iops> { - def _32 : NEONDataTypeAsmPseudoInst<opc, ".32", asm, iops>; - defm _ : NEONDT32ReqAsmPseudoInst<opc, asm, iops>; -} -multiclass NEONDT64ReqAsmPseudoInst<string opc, string asm, dag iops> { - def I64 : NEONDataTypeAsmPseudoInst<opc, ".i64", asm, iops>; - def S64 : NEONDataTypeAsmPseudoInst<opc, ".s64", asm, iops>; - def U64 : NEONDataTypeAsmPseudoInst<opc, ".u64", asm, iops>; - def F64 : NEONDataTypeAsmPseudoInst<opc, ".f64", asm, iops>; - def D : NEONDataTypeAsmPseudoInst<opc, ".d", asm, iops>; -} -// NEONDT64ReqAsmPseudoInst plus plain ".64" -multiclass NEONDT64AsmPseudoInst<string opc, string asm, dag iops> { - def _64 : NEONDataTypeAsmPseudoInst<opc, ".64", asm, iops>; - defm _ : NEONDT64ReqAsmPseudoInst<opc, asm, iops>; -} -multiclass NEONDT64NoF64ReqAsmPseudoInst<string opc, string asm, dag iops> { - def I64 : NEONDataTypeAsmPseudoInst<opc, ".i64", asm, iops>; - def S64 : NEONDataTypeAsmPseudoInst<opc, ".s64", asm, iops>; - def U64 : NEONDataTypeAsmPseudoInst<opc, ".u64", asm, iops>; - def D : NEONDataTypeAsmPseudoInst<opc, ".d", asm, iops>; -} -// NEONDT64ReqAsmPseudoInst plus plain ".64" -multiclass NEONDT64NoF64AsmPseudoInst<string opc, string asm, dag iops> { - def _64 : NEONDataTypeAsmPseudoInst<opc, ".64", asm, iops>; - defm _ : NEONDT64ReqAsmPseudoInst<opc, asm, iops>; -} -multiclass NEONDTAnyAsmPseudoInst<string opc, string asm, dag iops> { - defm _ : NEONDT8AsmPseudoInst<opc, asm, iops>; - defm _ : NEONDT16AsmPseudoInst<opc, asm, iops>; - defm _ : NEONDT32AsmPseudoInst<opc, asm, iops>; - defm _ : NEONDT64AsmPseudoInst<opc, asm, iops>; -} -multiclass NEONDTAnyNoF64AsmPseudoInst<string opc, string asm, dag iops> { - defm _ : NEONDT8AsmPseudoInst<opc, asm, iops>; - defm _ : NEONDT16AsmPseudoInst<opc, asm, iops>; - defm _ : NEONDT32AsmPseudoInst<opc, asm, iops>; - defm _ : NEONDT64NoF64AsmPseudoInst<opc, asm, iops>; -} // Data type suffix token aliases. Implements Table A7-3 in the ARM ARM. def : TokenAlias<".s8", ".i8">; diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 48da03f..b8f607e 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -1,4 +1,4 @@ -//===- ARMInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===// +//===-- ARMInstrInfo.cpp - ARM Instruction Information --------------------===// // // The LLVM Compiler Infrastructure // @@ -21,12 +21,29 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInst.h" using namespace llvm; ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI(*this, STI) { } +/// getNoopForMachoTarget - Return the noop instruction to use for a noop. +void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { + if (hasNOP()) { + NopInst.setOpcode(ARM::NOP); + NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + NopInst.addOperand(MCOperand::CreateReg(0)); + } else { + NopInst.setOpcode(ARM::MOVr); + NopInst.addOperand(MCOperand::CreateReg(ARM::R0)); + NopInst.addOperand(MCOperand::CreateReg(ARM::R0)); + NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + NopInst.addOperand(MCOperand::CreateReg(0)); + NopInst.addOperand(MCOperand::CreateReg(0)); + } +} + unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { switch (Opc) { default: break; diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index f2c7bdc..7bedf30 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -1,4 +1,4 @@ -//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===// +//===-- ARMInstrInfo.h - ARM Instruction Information ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -28,6 +28,9 @@ class ARMInstrInfo : public ARMBaseInstrInfo { public: explicit ARMInstrInfo(const ARMSubtarget &STI); + /// getNoopForMachoTarget - Return the noop instruction to use for a noop. + void getNoopForMachoTarget(MCInst &NopInst) const; + // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 516a080..0b1406e 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -179,8 +179,14 @@ def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, AssemblerPredicate<"FeatureVFP2">; def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate<"FeatureVFP3">; +def HasVFP4 : Predicate<"Subtarget->hasVFP4()">, + AssemblerPredicate<"FeatureVFP4">; +def NoVFP4 : Predicate<"!Subtarget->hasVFP4()">; def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON">; +def HasNEON2 : Predicate<"Subtarget->hasNEON2()">, + AssemblerPredicate<"FeatureNEON2">; +def NoNEON2 : Predicate<"!Subtarget->hasNEON2()">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16">; def HasDivide : Predicate<"Subtarget->hasDivide()">, @@ -206,8 +212,8 @@ def IsARClass : Predicate<"!Subtarget->isMClass()">, AssemblerPredicate<"!FeatureMClass">; def IsARM : Predicate<"!Subtarget->isThumb()">, AssemblerPredicate<"!ModeThumb">; -def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; -def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; +def IsIOS : Predicate<"Subtarget->isTargetIOS()">; +def IsNotIOS : Predicate<"!Subtarget->isTargetIOS()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; // FIXME: Eventually this will be just "hasV6T2Ops". @@ -343,13 +349,11 @@ def bltarget : Operand<i32> { // Call target for ARM. Handles conditional/unconditional // FIXME: rename bl_target to t2_bltarget? def bl_target : Operand<i32> { - // Encoded the same as branch targets. - let EncoderMethod = "getARMBranchTargetOpValue"; + let EncoderMethod = "getARMBLTargetOpValue"; let OperandType = "OPERAND_PCREL"; } def blx_target : Operand<i32> { - // Encoded the same as branch targets. let EncoderMethod = "getARMBLXTargetOpValue"; let OperandType = "OPERAND_PCREL"; } @@ -760,7 +764,7 @@ def am2offset_reg : Operand<i32>, let PrintMethod = "printAddrMode2OffsetOperand"; // When using this for assembly, it's always as a post-index offset. let ParserMatchClass = PostIdxRegShiftedAsmOperand; - let MIOperandInfo = (ops GPR, i32imm); + let MIOperandInfo = (ops GPRnopc, i32imm); } // FIXME: am2offset_imm should only need the immediate, not the GPR. Having @@ -772,7 +776,7 @@ def am2offset_imm : Operand<i32>, let EncoderMethod = "getAddrMode2OffsetOpValue"; let PrintMethod = "printAddrMode2OffsetOperand"; let ParserMatchClass = AM2OffsetImmAsmOperand; - let MIOperandInfo = (ops GPR, i32imm); + let MIOperandInfo = (ops GPRnopc, i32imm); } @@ -1892,20 +1896,17 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { } } -// All calls clobber the non-callee saved registers. SP is marked as -// a use to prevent stack-pointer assignments that appear immediately -// before calls from potentially appearing dead. +// SP is marked as a use to prevent stack-pointer assignments that appear +// immediately before calls from potentially appearing dead. let isCall = 1, - // On non-Darwin platforms R9 is callee-saved. // FIXME: Do we really need a non-predicated version? If so, it should // at least be a pseudo instruction expanding to the predicated version // at MC lowering time. - Defs = [R0, R1, R2, R3, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR], - Uses = [SP] in { + Defs = [LR], Uses = [SP] in { def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops), IIC_Br, "bl\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsARM, IsNotDarwin]> { + Requires<[IsARM, IsNotIOS]> { let Inst{31-28} = 0b1110; bits<24> func; let Inst{23-0} = func; @@ -1915,7 +1916,7 @@ let isCall = 1, def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops), IIC_Br, "bl", "\t$func", [(ARMcall_pred tglobaladdr:$func)]>, - Requires<[IsARM, IsNotDarwin]> { + Requires<[IsARM, IsNotIOS]> { bits<24> func; let Inst{23-0} = func; let DecoderMethod = "DecodeBranchImmInstruction"; @@ -1925,7 +1926,7 @@ let isCall = 1, def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, IIC_Br, "blx\t$func", [(ARMcall GPR:$func)]>, - Requires<[IsARM, HasV5T, IsNotDarwin]> { + Requires<[IsARM, HasV5T, IsNotIOS]> { bits<4> func; let Inst{31-4} = 0b1110000100101111111111110011; let Inst{3-0} = func; @@ -1934,7 +1935,7 @@ let isCall = 1, def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, IIC_Br, "blx", "\t$func", [(ARMcall_pred GPR:$func)]>, - Requires<[IsARM, HasV5T, IsNotDarwin]> { + Requires<[IsARM, HasV5T, IsNotIOS]> { bits<4> func; let Inst{27-4} = 0b000100101111111111110011; let Inst{3-0} = func; @@ -1944,55 +1945,67 @@ let isCall = 1, // Note: Restrict $func to the tGPR regclass to prevent it being in LR. def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, HasV4T, IsNotDarwin]>; + Requires<[IsARM, HasV4T, IsNotIOS]>; // ARMv4 def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, NoV4T, IsNotDarwin]>; + Requires<[IsARM, NoV4T, IsNotIOS]>; + + // mov lr, pc; b if callee is marked noreturn to avoid confusing the + // return stack predictor. + def BMOVPCB_CALL : ARMPseudoInst<(outs), + (ins bl_target:$func, variable_ops), + 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, + Requires<[IsARM, IsNotIOS]>; } let isCall = 1, - // On Darwin R9 is call-clobbered. + // On IOS R9 is call-clobbered. // R7 is marked as a use to prevent frame-pointer assignments from being // moved above / below calls. - Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR], - Uses = [R7, SP] in { + Defs = [LR], Uses = [R7, SP] in { def BLr9 : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops), 4, IIC_Br, [(ARMcall tglobaladdr:$func)], (BL bl_target:$func)>, - Requires<[IsARM, IsDarwin]>; + Requires<[IsARM, IsIOS]>; def BLr9_pred : ARMPseudoExpand<(outs), (ins bl_target:$func, pred:$p, variable_ops), 4, IIC_Br, [(ARMcall_pred tglobaladdr:$func)], (BL_pred bl_target:$func, pred:$p)>, - Requires<[IsARM, IsDarwin]>; + Requires<[IsARM, IsIOS]>; // ARMv5T and above def BLXr9 : ARMPseudoExpand<(outs), (ins GPR:$func, variable_ops), 4, IIC_Br, [(ARMcall GPR:$func)], (BLX GPR:$func)>, - Requires<[IsARM, HasV5T, IsDarwin]>; + Requires<[IsARM, HasV5T, IsIOS]>; def BLXr9_pred: ARMPseudoExpand<(outs), (ins GPR:$func, pred:$p,variable_ops), 4, IIC_Br, [(ARMcall_pred GPR:$func)], (BLX_pred GPR:$func, pred:$p)>, - Requires<[IsARM, HasV5T, IsDarwin]>; + Requires<[IsARM, HasV5T, IsIOS]>; // ARMv4T // Note: Restrict $func to the tGPR regclass to prevent it being in LR. def BXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, HasV4T, IsDarwin]>; + Requires<[IsARM, HasV4T, IsIOS]>; // ARMv4 def BMOVPCRXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, NoV4T, IsDarwin]>; + Requires<[IsARM, NoV4T, IsIOS]>; + + // mov lr, pc; b if callee is marked noreturn to avoid confusing the + // return stack predictor. + def BMOVPCBr9_CALL : ARMPseudoInst<(outs),(ins bl_target:$func, variable_ops), + 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, + Requires<[IsARM, IsIOS]>; } let isBranch = 1, isTerminator = 1 in { @@ -2060,45 +2073,43 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", // Tail calls. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { - // Darwin versions. - let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC], - Uses = [SP] in { + // IOS versions. + let Uses = [SP] in { def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), - IIC_Br, []>, Requires<[IsDarwin]>; + IIC_Br, []>, Requires<[IsIOS]>; def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), - IIC_Br, []>, Requires<[IsDarwin]>; + IIC_Br, []>, Requires<[IsIOS]>; def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops), 4, IIC_Br, [], (Bcc br_target:$dst, (ops 14, zero_reg))>, - Requires<[IsARM, IsDarwin]>; + Requires<[IsARM, IsIOS]>; def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), 4, IIC_Br, [], (BX GPR:$dst)>, - Requires<[IsARM, IsDarwin]>; + Requires<[IsARM, IsIOS]>; } - // Non-Darwin versions (the difference is R9). - let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC], - Uses = [SP] in { + // Non-IOS versions (the difference is R9). + let Uses = [SP] in { def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), - IIC_Br, []>, Requires<[IsNotDarwin]>; + IIC_Br, []>, Requires<[IsNotIOS]>; def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), - IIC_Br, []>, Requires<[IsNotDarwin]>; + IIC_Br, []>, Requires<[IsNotIOS]>; def TAILJMPdND : ARMPseudoExpand<(outs), (ins brtarget:$dst, variable_ops), 4, IIC_Br, [], (Bcc br_target:$dst, (ops 14, zero_reg))>, - Requires<[IsARM, IsNotDarwin]>; + Requires<[IsARM, IsNotIOS]>; def TAILJMPrND : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), 4, IIC_Br, [], (BX GPR:$dst)>, - Requires<[IsARM, IsNotDarwin]>; + Requires<[IsARM, IsNotIOS]>; } } @@ -4072,6 +4083,73 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), 4, IIC_iCMOVi, [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; + +let isCodeGenOnly = 1 in { +// Conditional instructions +multiclass AsI1_bincc_irs<bits<4> opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> { + def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, + iii, opc, "\t$Rd, $Rn, $imm", []>, + RegConstraint<"$Rn = $Rd"> { + bits<4> Rd; + bits<4> Rn; + bits<12> imm; + let Inst{25} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-0} = imm; + } + def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, + iir, opc, "\t$Rd, $Rn, $Rm", []>, + RegConstraint<"$Rn = $Rd"> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-4} = 0b00000000; + let Inst{3-0} = Rm; + } + + def rsi : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, + iis, opc, "\t$Rd, $Rn, $shift", []>, + RegConstraint<"$Rn = $Rd"> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; + } + + def rsr : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, + iis, opc, "\t$Rd, $Rn, $shift", []>, + RegConstraint<"$Rn = $Rd"> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; + } +} // AsI1_bincc_irs + +defm ANDCC : AsI1_bincc_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsr>; +defm ORRCC : AsI1_bincc_irs<0b1100, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsr>; +defm EORCC : AsI1_bincc_irs<0b0001, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsr>; + +} // isCodeGenOnly } // neverHasSideEffects //===----------------------------------------------------------------------===// @@ -4152,10 +4230,10 @@ let usesCustomInserter = 1 in { [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_UMIN_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>; + [(set GPR:$dst, (atomic_load_umin_8 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_UMAX_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>; + [(set GPR:$dst, (atomic_load_umax_8 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_ADD_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>; @@ -4182,10 +4260,10 @@ let usesCustomInserter = 1 in { [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_UMIN_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>; + [(set GPR:$dst, (atomic_load_umin_16 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_UMAX_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>; + [(set GPR:$dst, (atomic_load_umax_16 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_ADD_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>; @@ -4212,10 +4290,10 @@ let usesCustomInserter = 1 in { [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_UMIN_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>; + [(set GPR:$dst, (atomic_load_umin_32 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_UMAX_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>; + [(set GPR:$dst, (atomic_load_umax_32 GPR:$ptr, GPR:$val))]>; def ATOMIC_SWAP_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, @@ -4261,14 +4339,14 @@ def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>; def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>; -} - -let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in +let hasExtraSrcRegAllocReq = 1 in def STREXD : AIstrex<0b01, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr), NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []> { let DecoderMethod = "DecodeDoubleRegStore"; } +} + def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>, Requires<[IsARM, HasV7]> { @@ -4711,8 +4789,8 @@ let isCall = 1, // no encoding information is necessary. let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR, - QQQQ0, QQQQ1, QQQQ2, QQQQ3 ], hasSideEffects = 1, isBarrier = 1, - usesCustomInserter = 1 in { + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ], + hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), NoItinerary, [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, @@ -4721,28 +4799,37 @@ let Defs = let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ], - hasSideEffects = 1, isBarrier = 1 in { + hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), NoItinerary, [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, NoVFP]>; } -// FIXME: Non-Darwin version(s) +// FIXME: Non-IOS version(s) let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, Defs = [ R7, LR, SP ] in { def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch), NoItinerary, [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, - Requires<[IsARM, IsDarwin]>; + Requires<[IsARM, IsIOS]>; } -// eh.sjlj.dispatchsetup pseudo-instruction. -// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are +// eh.sjlj.dispatchsetup pseudo-instructions. +// These pseudos are used for both ARM and Thumb2. Any differences are // handled when the pseudo is expanded (which happens before any passes // that need the instruction size). -let isBarrier = 1 in -def eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>; +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ], + isBarrier = 1 in +def Int_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>; + +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ], + isBarrier = 1 in +def Int_eh_sjlj_dispatchsetup_nofp : PseudoInst<(outs), (ins), NoItinerary, []>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -4801,28 +4888,34 @@ def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), // Tail calls def : ARMPat<(ARMtcret tcGPR:$dst), - (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>; + (TCRETURNri tcGPR:$dst)>, Requires<[IsIOS]>; def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), - (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>; + (TCRETURNdi texternalsym:$dst)>, Requires<[IsIOS]>; def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), - (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>; + (TCRETURNdi texternalsym:$dst)>, Requires<[IsIOS]>; def : ARMPat<(ARMtcret tcGPR:$dst), - (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>; + (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotIOS]>; def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), - (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>; + (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotIOS]>; def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), - (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>; + (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotIOS]>; // Direct calls def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>, - Requires<[IsARM, IsNotDarwin]>; + Requires<[IsARM, IsNotIOS]>; def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>, - Requires<[IsARM, IsDarwin]>; + Requires<[IsARM, IsIOS]>; +def : ARMPat<(ARMcall_nolink texternalsym:$func), + (BMOVPCB_CALL texternalsym:$func)>, + Requires<[IsARM, IsNotIOS]>; +def : ARMPat<(ARMcall_nolink texternalsym:$func), + (BMOVPCBr9_CALL texternalsym:$func)>, + Requires<[IsARM, IsIOS]>; // zextload i1 -> zextload i8 def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; @@ -5158,3 +5251,7 @@ def : ARMInstAlias<"mul${s}${p} $Rn, $Rm", // "neg" is and alias for "rsb rd, rn, #0" def : ARMInstAlias<"neg${s}${p} $Rd, $Rm", (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>; + +// 'it' blocks in ARM mode just validate the predicates. The IT itself +// is discarded. +def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index c40860d..8684ce1 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1,4 +1,4 @@ -//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===// +//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -39,6 +39,11 @@ def nImmVMOVI32 : Operand<i32> { let PrintMethod = "printNEONModImmOperand"; let ParserMatchClass = nImmVMOVI32AsmOperand; } +def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } +def nImmVMOVI32Neg : Operand<i32> { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVMOVI32NegAsmOperand; +} def nImmVMOVF32 : Operand<i32> { let PrintMethod = "printFPImmOperand"; let ParserMatchClass = FPImmOperand; @@ -84,13 +89,13 @@ def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { let ParserMatchClass = VecListOneDAsmOperand; } // Register list of two sequential D registers. -def VecListTwoDAsmOperand : AsmOperandClass { - let Name = "VecListTwoD"; +def VecListDPairAsmOperand : AsmOperandClass { + let Name = "VecListDPair"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } -def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> { - let ParserMatchClass = VecListTwoDAsmOperand; +def VecListDPair : RegisterOperand<DPair, "printVectorListDPair"> { + let ParserMatchClass = VecListDPairAsmOperand; } // Register list of three sequential D registers. def VecListThreeDAsmOperand : AsmOperandClass { @@ -111,13 +116,31 @@ def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { let ParserMatchClass = VecListFourDAsmOperand; } // Register list of two D registers spaced by 2 (two sequential Q registers). -def VecListTwoQAsmOperand : AsmOperandClass { - let Name = "VecListTwoQ"; +def VecListDPairSpacedAsmOperand : AsmOperandClass { + let Name = "VecListDPairSpaced"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListDPairSpaced"> { + let ParserMatchClass = VecListDPairSpacedAsmOperand; +} +// Register list of three D registers spaced by 2 (three Q registers). +def VecListThreeQAsmOperand : AsmOperandClass { + let Name = "VecListThreeQ"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { + let ParserMatchClass = VecListThreeQAsmOperand; +} +// Register list of three D registers spaced by 2 (three Q registers). +def VecListFourQAsmOperand : AsmOperandClass { + let Name = "VecListFourQ"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } -def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwoSpaced"> { - let ParserMatchClass = VecListTwoQAsmOperand; +def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { + let ParserMatchClass = VecListFourQAsmOperand; } // Register list of one D register, with "all lanes" subscripting. @@ -138,6 +161,56 @@ def VecListTwoDAllLanesAsmOperand : AsmOperandClass { def VecListTwoDAllLanes : RegisterOperand<DPR, "printVectorListTwoAllLanes"> { let ParserMatchClass = VecListTwoDAllLanesAsmOperand; } +// Register list of two D registers spaced by 2 (two sequential Q registers). +def VecListTwoQAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListTwoQAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListTwoQAllLanes : RegisterOperand<DPR, + "printVectorListTwoSpacedAllLanes"> { + let ParserMatchClass = VecListTwoQAllLanesAsmOperand; +} +// Register list of three D registers, with "all lanes" subscripting. +def VecListThreeDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListThreeDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeDAllLanes : RegisterOperand<DPR, + "printVectorListThreeAllLanes"> { + let ParserMatchClass = VecListThreeDAllLanesAsmOperand; +} +// Register list of three D registers spaced by 2 (three sequential Q regs). +def VecListThreeQAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListThreeQAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeQAllLanes : RegisterOperand<DPR, + "printVectorListThreeSpacedAllLanes"> { + let ParserMatchClass = VecListThreeQAllLanesAsmOperand; +} +// Register list of four D registers, with "all lanes" subscripting. +def VecListFourDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListFourDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { + let ParserMatchClass = VecListFourDAllLanesAsmOperand; +} +// Register list of four D registers spaced by 2 (four sequential Q regs). +def VecListFourQAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListFourQAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourQAllLanes : RegisterOperand<DPR, + "printVectorListFourSpacedAllLanes"> { + let ParserMatchClass = VecListFourQAllLanesAsmOperand; +} + // Register list of one D register, with byte lane subscripting. def VecListOneDByteIndexAsmOperand : AsmOperandClass { @@ -169,7 +242,8 @@ def VecListOneDWordIndexed : Operand<i32> { let ParserMatchClass = VecListOneDWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } -// Register list of two D registers, with byte lane subscripting. + +// Register list of two D registers with byte lane subscripting. def VecListTwoDByteIndexAsmOperand : AsmOperandClass { let Name = "VecListTwoDByteIndexed"; let ParserMethod = "parseVectorList"; @@ -199,6 +273,130 @@ def VecListTwoDWordIndexed : Operand<i32> { let ParserMatchClass = VecListTwoDWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } +// Register list of two Q registers with half-word lane subscripting. +def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoQHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoQHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListTwoQWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoQWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoQWordIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoQWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + + +// Register list of three D registers with byte lane subscripting. +def VecListThreeDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeDByteIndexed : Operand<i32> { + let ParserMatchClass = VecListThreeDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeDHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListThreeDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeDWordIndexed : Operand<i32> { + let ParserMatchClass = VecListThreeDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// Register list of three Q registers with half-word lane subscripting. +def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeQHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeQHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListThreeQWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeQWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeQWordIndexed : Operand<i32> { + let ParserMatchClass = VecListThreeQWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + +// Register list of four D registers with byte lane subscripting. +def VecListFourDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDByteIndexed : Operand<i32> { + let ParserMatchClass = VecListFourDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListFourDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListFourDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListFourDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDWordIndexed : Operand<i32> { + let ParserMatchClass = VecListFourDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// Register list of four Q registers with half-word lane subscripting. +def VecListFourQHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourQHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourQHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListFourQHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListFourQWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourQWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourQWordIndexed : Operand<i32> { + let ParserMatchClass = VecListFourQWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. @@ -395,7 +593,7 @@ class VLD1D<bits<4> op7_4, string Dt> let DecoderMethod = "DecodeVLDInstruction"; } class VLD1Q<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd), + : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), (ins addrmode6:$Rn), IIC_VLD1x2, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; @@ -413,11 +611,6 @@ def VLD1q16 : VLD1Q<{0,1,?,?}, "16">; def VLD1q32 : VLD1Q<{1,0,?,?}, "32">; def VLD1q64 : VLD1Q<{1,1,?,?}, "64">; -def VLD1q8Pseudo : VLDQPseudo<IIC_VLD1x2>; -def VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>; -def VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>; -def VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>; - // ...with address register writeback: multiclass VLD1DWB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), @@ -439,7 +632,7 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt> { } } multiclass VLD1QWB<bits<4> op7_4, string Dt> { - def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), + def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins addrmode6:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { @@ -448,7 +641,7 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDInstruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } - def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), + def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -467,15 +660,6 @@ defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">; defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">; defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">; -def VLD1q8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>; -def VLD1q16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>; -def VLD1q32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>; -def VLD1q64PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>; -def VLD1q8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>; -def VLD1q16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>; -def VLD1q32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>; -def VLD1q64PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>; - // ...with 3 registers class VLD1D3<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), @@ -569,18 +753,14 @@ class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, let DecoderMethod = "DecodeVLDInstruction"; } -def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2>; -def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2>; -def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2>; +def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; +def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>; +def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>; def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; -def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>; -def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>; -def VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>; - def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; @@ -607,21 +787,14 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, } } -defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2u>; -defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2u>; -defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2u>; +defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>; +defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>; +defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>; defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; -def VLD2d8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; -def VLD2d16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; -def VLD2d32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; -def VLD2d8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; -def VLD2d16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; -def VLD2d32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; - def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; @@ -630,12 +803,12 @@ def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; // ...with double-spaced registers -def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2>; -def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2>; -def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2>; -defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2u>; -defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2u>; -defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2u>; +def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>; +def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>; +def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>; +defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>; +defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>; +defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>; // VLD3 : Vector Load (multiple 3-element structures) class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -819,12 +992,11 @@ def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { } def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{4}; + let Inst{5-4} = Rn{5-4}; } def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { let Inst{7} = lane{0}; - let Inst{5} = Rn{4}; - let Inst{4} = Rn{4}; + let Inst{5-4} = Rn{5-4}; } def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; @@ -994,7 +1166,7 @@ def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; } def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; @@ -1005,7 +1177,7 @@ def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; } def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; @@ -1020,7 +1192,7 @@ class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { let Rm = 0b1111; - let Inst{4} = Rn{4}; + let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD4LN"; } @@ -1031,7 +1203,7 @@ def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } @@ -1044,7 +1216,7 @@ def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } @@ -1072,7 +1244,7 @@ def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } @@ -1084,7 +1256,7 @@ def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } @@ -1197,48 +1369,65 @@ def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; // VLD2DUP : Vector Load (single 2-element structure to all lanes) -class VLD2DUP<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2), +class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy> + : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), (ins addrmode6dup:$Rn), IIC_VLD2dup, - "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { + "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; } -def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8">; -def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">; -def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">; +def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListTwoDAllLanes>; +def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListTwoDAllLanes>; +def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListTwoDAllLanes>; def VLD2DUPd8Pseudo : VLDQPseudo<IIC_VLD2dup>; def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>; def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>; // ...with double-spaced registers (not used for codegen): -def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8">; -def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">; -def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">; +def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListTwoQAllLanes>; +def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListTwoQAllLanes>; +def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListTwoQAllLanes>; // ...with address register writeback: -class VLD2DUPWB<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), - (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu, - "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD2DupInstruction"; +multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { + def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, + (outs VdTy:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn), IIC_VLD2dupu, + "vld2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2DupInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<1, 0b10, 0b1101, op7_4, + (outs VdTy:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu, + "vld2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2DupInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -def VLD2DUPd8_UPD : VLD2DUPWB<{0,0,0,0}, "8">; -def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">; -def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">; +defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListTwoDAllLanes>; +defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListTwoDAllLanes>; +defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListTwoDAllLanes>; -def VLD2DUPd8x2_UPD : VLD2DUPWB<{0,0,1,0}, "8">; -def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">; -def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">; +defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListTwoQAllLanes>; +defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListTwoQAllLanes>; +defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListTwoQAllLanes>; -def VLD2DUPd8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; -def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; -def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; +def VLD2DUPd8PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>; +def VLD2DUPd8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>; +def VLD2DUPd16PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>; +def VLD2DUPd16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>; +def VLD2DUPd32PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>; +def VLD2DUPd32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>; // VLD3DUP : Vector Load (single 3-element structure to all lanes) class VLD3DUP<bits<4> op7_4, string Dt> @@ -1259,9 +1448,9 @@ def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>; def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>; // ...with double-spaced registers (not used for codegen): -def VLD3DUPd8x2 : VLD3DUP<{0,0,1,?}, "8">; -def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">; -def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">; +def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; +def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; +def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; // ...with address register writeback: class VLD3DUPWB<bits<4> op7_4, string Dt> @@ -1277,9 +1466,9 @@ def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; -def VLD3DUPd8x2_UPD : VLD3DUPWB<{0,0,1,0}, "8">; -def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">; -def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">; +def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">; +def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">; +def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">; def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; @@ -1305,9 +1494,9 @@ def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>; def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>; // ...with double-spaced registers (not used for codegen): -def VLD4DUPd8x2 : VLD4DUP<{0,0,1,?}, "8">; -def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">; -def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } +def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; +def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; +def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } // ...with address register writeback: class VLD4DUPWB<bits<4> op7_4, string Dt> @@ -1324,9 +1513,9 @@ def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } -def VLD4DUPd8x2_UPD : VLD4DUPWB<{0,0,1,0}, "8">; -def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">; -def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } +def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; +def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; +def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; @@ -1358,6 +1547,15 @@ class VSTQQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, "$addr.addr = $wb">; +class VSTQQWBfixedPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, QQPR:$src), itin, + "$addr.addr = $wb">; +class VSTQQWBregisterPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, + "$addr.addr = $wb">; + class VSTQQQQPseudo<InstrItinClass itin> : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; class VSTQQQQWBPseudo<InstrItinClass itin> @@ -1374,7 +1572,7 @@ class VST1D<bits<4> op7_4, string Dt> let DecoderMethod = "DecodeVSTInstruction"; } class VST1Q<bits<4> op7_4, string Dt> - : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListTwoD:$Vd), + : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; @@ -1391,11 +1589,6 @@ def VST1q16 : VST1Q<{0,1,?,?}, "16">; def VST1q32 : VST1Q<{1,0,?,?}, "32">; def VST1q64 : VST1Q<{1,1,?,?}, "64">; -def VST1q8Pseudo : VSTQPseudo<IIC_VST1x2>; -def VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>; -def VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>; -def VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>; - // ...with address register writeback: multiclass VST1DWB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), @@ -1419,7 +1612,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> { } multiclass VST1QWB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VecListTwoD:$Vd), IIC_VLD1x2u, + (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1428,7 +1621,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> { let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VecListTwoD:$Vd), + (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1448,15 +1641,6 @@ defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">; defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">; defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">; -def VST1q8PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>; -def VST1q16PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>; -def VST1q32PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>; -def VST1q64PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>; -def VST1q8PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; -def VST1q16PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; -def VST1q32PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; -def VST1q64PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; - // ...with 3 registers class VST1D3<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), @@ -1556,18 +1740,14 @@ class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, let DecoderMethod = "DecodeVSTInstruction"; } -def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VST2>; -def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VST2>; -def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VST2>; +def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; +def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>; +def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>; def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; -def VST2d8Pseudo : VSTQPseudo<IIC_VST2>; -def VST2d16Pseudo : VSTQPseudo<IIC_VST2>; -def VST2d32Pseudo : VSTQPseudo<IIC_VST2>; - def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; @@ -1614,35 +1794,28 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> { } } -defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>; -defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>; -defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>; +defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>; +defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>; +defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>; defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; -def VST2d8PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; -def VST2d16PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; -def VST2d32PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; -def VST2d8PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; -def VST2d16PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; -def VST2d32PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; - -def VST2q8PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q16PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q32PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q8PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q16PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q32PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; +def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; +def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; +def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; +def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; +def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; // ...with double-spaced registers -def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VST2>; -def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VST2>; -def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VST2>; -defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>; -defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>; -defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>; +def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>; +def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>; +def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>; +defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>; +defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>; +defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>; // VST3 : Vector Store (multiple 3-element structures) class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -3837,10 +4010,10 @@ defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx]>; + Requires<[HasNEON, UseFPVMLx, NoNEON2]>; def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx]>; + Requires<[HasNEON, UseFPVMLx, NoNEON2]>; defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", @@ -3895,10 +4068,10 @@ defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx]>; + Requires<[HasNEON, UseFPVMLx, NoNEON2]>; def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx]>; + Requires<[HasNEON, UseFPVMLx, NoNEON2]>; defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", @@ -3947,6 +4120,24 @@ defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; + +// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. +def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", + v2f32, fmul_su, fadd_mlx>, + Requires<[HasNEON2,FPContractions]>; + +def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", + v4f32, fmul_su, fadd_mlx>, + Requires<[HasNEON2,FPContractions]>; + +// Fused Vector Multiply Subtract (floating-point) +def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", + v2f32, fmul_su, fsub_mlx>, + Requires<[HasNEON2,FPContractions]>; +def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", + v4f32, fmul_su, fsub_mlx>, + Requires<[HasNEON2,FPContractions]>; + // Vector Subtract Operations. // VSUB : Vector Subtract (integer and floating-point) @@ -4628,11 +4819,13 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, // Vector Swap def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, - (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, - "vswp", "$Vd, $Vm", "", []>; + (outs DPR:$Vd, DPR:$Vd1), (ins DPR:$Vm, DPR:$Vm1), + NoItinerary, "vswp", "$Vd, $Vd1", "$Vm = $Vd, $Vm1 = $Vd1", + []>; def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, - (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, - "vswp", "$Vd, $Vm", "", []>; + (outs QPR:$Vd, QPR:$Vd1), (ins QPR:$Vm, QPR:$Vm1), + NoItinerary, "vswp", "$Vd, $Vd1", "$Vm = $Vd, $Vm1 = $Vd1", + []>; // Vector Move Operations. @@ -4964,6 +5157,9 @@ defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, // VMOVL : Vector Lengthening Move defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; +def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; +def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; +def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; // Vector Conversions. @@ -5198,7 +5394,7 @@ def VTBL1 let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), - (ins VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, + (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), @@ -5211,8 +5407,6 @@ def VTBL4 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; } // hasExtraSrcRegAllocReq = 1 -def VTBL2Pseudo - : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>; def VTBL3Pseudo : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; def VTBL4Pseudo @@ -5228,7 +5422,7 @@ def VTBX1 let hasExtraSrcRegAllocReq = 1 in { def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), - (ins DPR:$orig, VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, + (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; def VTBX3 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), @@ -5243,9 +5437,6 @@ def VTBX4 "$orig = $Vd", []>; } // hasExtraSrcRegAllocReq = 1 -def VTBX2Pseudo - : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src), - IIC_VTBX2, "$orig = $dst", []>; def VTBX3Pseudo : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), IIC_VTBX3, "$orig = $dst", []>; @@ -5295,9 +5486,13 @@ def : N3VSPat<fadd, VADDfd>; def : N3VSPat<fsub, VSUBfd>; def : N3VSPat<fmul, VMULfd>; def : N3VSMulOpPat<fmul, fadd, VMLAfd>, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>; def : N3VSMulOpPat<fmul, fsub, VMLSfd>, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>; +def : N3VSMulOpPat<fmul, fadd, VFMAfd>, + Requires<[HasNEON2, UseNEONForFP,FPContractions]>; +def : N3VSMulOpPat<fmul, fsub, VFMSfd>, + Requires<[HasNEON2, UseNEONForFP,FPContractions]>; def : N2VSPat<fabs, VABSfd>; def : N2VSPat<fneg, VNEGfd>; def : N3VSPat<NEONfmax, VMAXfd>; @@ -5374,6 +5569,117 @@ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; +// Vector lengthening move with load, matching extending loads. + +// extload, zextload and sextload for a standard lengthening load. Example: +// Lengthen_Single<"8", "i16", "i8"> = Pat<(v8i16 (extloadvi8 addrmode5:$addr)) +// (VMOVLuv8i16 (VLDRD addrmode5:$addr))>; +multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), + (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) + (VLDRD addrmode5:$addr))>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), + (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) + (VLDRD addrmode5:$addr))>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), + (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) + (VLDRD addrmode5:$addr))>; +} + +// extload, zextload and sextload for a lengthening load which only uses +// half the lanes available. Example: +// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = +// Pat<(v4i16 (extloadvi8 addrmode5:$addr)) +// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), +// (VLDRS addrmode5:$addr), +// ssub_0)), +// dsub_0)>; +multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, + string InsnLanes, string InsnTy> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + dsub_0)>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + dsub_0)>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + dsub_0)>; +} + +// extload, zextload and sextload for a lengthening load followed by another +// lengthening load, to quadruple the initial length. +// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0> = +// Pat<(v4i32 (extloadvi8 addrmode5:$addr)) +// (EXTRACT_SUBREG (VMOVLuv4i32 +// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), +// (VLDRS addrmode5:$addr), +// ssub_0)), +// dsub_0)), +// qsub_0)>; +multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, + string Insn1Lanes, string Insn1Ty, string Insn2Lanes, + string Insn2Ty, SubRegIndex RegType> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), + ssub_0)), dsub_0)), + RegType)>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), + ssub_0)), dsub_0)), + RegType)>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), + ssub_0)), dsub_0)), + RegType)>; +} + +defm : Lengthen_Single<"8", "i16", "i8">; // v8i8 -> v8i16 +defm : Lengthen_Single<"4", "i32", "i16">; // v4i16 -> v4i32 +defm : Lengthen_Single<"2", "i64", "i32">; // v2i32 -> v2i64 + +defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 +defm : Lengthen_HalfSingle<"2", "i16", "i8", "8", "i16">; // v2i8 -> v2i16 +defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 + +// Double lengthening - v4i8 -> v4i16 -> v4i32 +defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0>; +// v2i8 -> v2i16 -> v2i32 +defm : Lengthen_Double<"2", "i32", "i8", "8", "i16", "4", "i32", dsub_0>; +// v2i16 -> v2i32 -> v2i64 +defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64", qsub_0>; + +// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 +def : Pat<(v2i64 (extloadvi8 addrmode5:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + dsub_0)), dsub_0))>; +def : Pat<(v2i64 (zextloadvi8 addrmode5:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + dsub_0)), dsub_0))>; +def : Pat<(v2i64 (sextloadvi8 addrmode5:$addr)), + (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + dsub_0)), dsub_0))>; //===----------------------------------------------------------------------===// // Assembler aliases @@ -5448,21 +5754,21 @@ def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm", (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; // VAND/VBIC/VEOR/VORR accept but do not require a type suffix. -defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; // ... two-operand aliases def : NEONInstAlias<"vand${p} $Vdn, $Vm", @@ -5482,17 +5788,17 @@ def : NEONInstAlias<"vorr${p} $Vdn, $Vm", def : NEONInstAlias<"vorr${p} $Vdn, $Vm", (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; // VMUL two-operand aliases. @@ -5668,122 +5974,724 @@ def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", // VLD1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. -defm VLD1LNdAsm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr", +def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdAsm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr", +def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdAsm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr", +def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr!", +def VLD1LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr!", +def VLD1LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr!", +def VLD1LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdWB_register_Asm : - NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", +def VLD1LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VLD1LNdWB_register_Asm : - NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", +def VLD1LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VLD1LNdWB_register_Asm : - NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", +def VLD1LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; // VST1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. -defm VST1LNdAsm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr", +def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdAsm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr", +def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdAsm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr", +def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr!", +def VST1LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr!", +def VST1LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr!", +def VST1LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdWB_register_Asm : - NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", +def VST1LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VST1LNdWB_register_Asm : - NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", +def VST1LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VST1LNdWB_register_Asm : - NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", +def VST1LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; // VLD2 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. -defm VLD2LNdAsm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr", +def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdAsm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr", +def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdAsm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr", +def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr!", +def VLD2LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr!", +def VLD2LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr!", +def VLD2LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdWB_register_Asm : - NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", +def VLD2LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VLD2LNdWB_register_Asm : - NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", +def VLD2LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VLD2LNdWB_register_Asm : - NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", +def VLD2LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +def VLD2LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD2LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; // VST2 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. -defm VST2LNdAsm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr", +def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdAsm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr", +def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdAsm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr", +def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr!", +def VST2LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr!", +def VST2LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr!", +def VST2LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdWB_register_Asm : - NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", +def VST2LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VST2LNdWB_register_Asm : - NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", +def VST2LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VST2LNdWB_register_Asm : - NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", +def VST2LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +def VST2LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST2LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD3 all-lanes pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + +def VLD3DUPdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VLD3 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VLD3LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD3 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + +def VLD3dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VST3 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VST3LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST3 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + +def VST3dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD4 all-lanes pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + +def VLD4DUPdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VLD4 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VLD4LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + + +// VLD4 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + +def VLD4dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VST4 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VST4LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST4 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + +def VST4dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; // VMOV takes an optional datatype suffix -defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", +defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", +defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. @@ -5878,6 +6786,185 @@ def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +// Two-operand variants for VMAX. +def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm", + (VMAXsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm", + (VMAXsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm", + (VMAXsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm", + (VMAXuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm", + (VMAXuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm", + (VMAXuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm", + (VMAXfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm", + (VMAXsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm", + (VMAXsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm", + (VMAXsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm", + (VMAXuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm", + (VMAXuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm", + (VMAXuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm", + (VMAXfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// Two-operand variants for VMIN. +def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm", + (VMINsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm", + (VMINsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm", + (VMINsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm", + (VMINuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm", + (VMINuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm", + (VMINuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm", + (VMINfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm", + (VMINsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm", + (VMINsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm", + (VMINsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm", + (VMINuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm", + (VMINuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm", + (VMINuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm", + (VMINfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// Two-operand variants for VPADD. +def : NEONInstAlias<"vpadd${p}.i8 $Vdn, $Vm", + (VPADDi8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vpadd${p}.i16 $Vdn, $Vm", + (VPADDi16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vpadd${p}.i32 $Vdn, $Vm", + (VPADDi32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vpadd${p}.f32 $Vdn, $Vm", + (VPADDf DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +// Two-operand variants for VSRA. + // Signed. +def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm", + (VSRAsv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm", + (VSRAsv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm", + (VSRAsv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm", + (VSRAsv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm", + (VSRAsv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm", + (VSRAsv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm", + (VSRAsv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm", + (VSRAsv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; + + // Unsigned. +def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm", + (VSRAuv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm", + (VSRAuv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm", + (VSRAuv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm", + (VSRAuv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm", + (VSRAuv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm", + (VSRAuv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm", + (VSRAuv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm", + (VSRAuv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +// Two-operand variants for VSRI. +def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm", + (VSRIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm", + (VSRIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm", + (VSRIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm", + (VSRIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm", + (VSRIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm", + (VSRIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm", + (VSRIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm", + (VSRIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +// Two-operand variants for VSLI. +def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm", + (VSLIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm", + (VSLIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm", + (VSLIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm", + (VSLIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm", + (VSLIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm", + (VSLIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm", + (VSLIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm", + (VSLIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +// VSWP allows, but does not require, a type suffix. +defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", + (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", + (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; + +// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. +defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", + (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", + (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", + (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", + (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; + +// "vmov Rd, #-imm" can be handled via "vmvn". +def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", + (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; +def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", + (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; +def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", + (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; +def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", + (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; + // 'gas' compatibility aliases for quad-word instructions. Strictly speaking, // these should restrict to just the Q register variants, but the register // classes are enough to match correctly regardless, so we keep it simple @@ -5911,3 +6998,18 @@ def : NEONMnemonicAlias<"vcvtq", "vcvt">; def : NEONMnemonicAlias<"vcleq", "vcle">; def : NEONMnemonicAlias<"vceqq", "vceq">; + +def : NEONMnemonicAlias<"vzipq", "vzip">; +def : NEONMnemonicAlias<"vswpq", "vswp">; + +def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; +def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; + + +// Alias for loading floating point immediates that aren't representable +// using the vmov.f32 encoding but the bitpattern is representable using +// the .i32 encoding. +def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", + (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; +def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", + (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index ac1a229..ba1791b 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -1,4 +1,4 @@ -//===- ARMInstrThumb.td - Thumb support for ARM ------------*- tablegen -*-===// +//===-- ARMInstrThumb.td - Thumb support for ARM -----------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -387,6 +387,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { bits<4> Rm; let Inst{6-3} = Rm; let Inst{2-0} = 0b000; + let Unpredictable{2-0} = 0b111; } } @@ -404,15 +405,14 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { // prevent stack-pointer assignments that appear immediately before calls from // potentially appearing dead. let isCall = 1, - // On non-Darwin platforms R9 is callee-saved. - Defs = [R0, R1, R2, R3, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR], - Uses = [SP] in { + // On non-IOS platforms R9 is callee-saved. + Defs = [LR], Uses = [SP] in { // Also used for Thumb2 def tBL : TIx2<0b11110, 0b11, 1, (outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br, "bl${p}\t$func", [(ARMtcall tglobaladdr:$func)]>, - Requires<[IsThumb, IsNotDarwin]> { + Requires<[IsThumb, IsNotIOS]> { bits<22> func; let Inst{26} = func{21}; let Inst{25-16} = func{20-11}; @@ -426,7 +426,7 @@ let isCall = 1, (outs), (ins pred:$p, t_blxtarget:$func, variable_ops), IIC_Br, "blx${p}\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsThumb, HasV5T, IsNotDarwin]> { + Requires<[IsThumb, HasV5T, IsNotIOS]> { bits<21> func; let Inst{25-16} = func{20-11}; let Inst{13} = 1; @@ -439,7 +439,7 @@ let isCall = 1, def tBLXr : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br, "blx${p}\t$func", [(ARMtcall GPR:$func)]>, - Requires<[IsThumb, HasV5T, IsNotDarwin]>, + Requires<[IsThumb, HasV5T, IsNotIOS]>, T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24; bits<4> func; let Inst{6-3} = func; @@ -450,38 +450,37 @@ let isCall = 1, def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops), 4, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsThumb, IsThumb1Only, IsNotDarwin]>; + Requires<[IsThumb, IsThumb1Only, IsNotIOS]>; } let isCall = 1, - // On Darwin R9 is call-clobbered. + // On IOS R9 is call-clobbered. // R7 is marked as a use to prevent frame-pointer assignments from being // moved above / below calls. - Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR], - Uses = [R7, SP] in { + Defs = [LR], Uses = [R7, SP] in { // Also used for Thumb2 def tBLr9 : tPseudoExpand<(outs), (ins pred:$p, t_bltarget:$func, variable_ops), 4, IIC_Br, [(ARMtcall tglobaladdr:$func)], (tBL pred:$p, t_bltarget:$func)>, - Requires<[IsThumb, IsDarwin]>; + Requires<[IsThumb, IsIOS]>; // ARMv5T and above, also used for Thumb2 def tBLXi_r9 : tPseudoExpand<(outs), (ins pred:$p, t_blxtarget:$func, variable_ops), 4, IIC_Br, [(ARMcall tglobaladdr:$func)], (tBLXi pred:$p, t_blxtarget:$func)>, - Requires<[IsThumb, HasV5T, IsDarwin]>; + Requires<[IsThumb, HasV5T, IsIOS]>; // Also used for Thumb2 def tBLXr_r9 : tPseudoExpand<(outs), (ins pred:$p, GPR:$func, variable_ops), 2, IIC_Br, [(ARMtcall GPR:$func)], (tBLXr pred:$p, GPR:$func)>, - Requires<[IsThumb, HasV5T, IsDarwin]>; + Requires<[IsThumb, HasV5T, IsIOS]>; // ARMv4T def tBXr9_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops), 4, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsThumb, IsThumb1Only, IsDarwin]>; + Requires<[IsThumb, IsThumb1Only, IsIOS]>; } let isBranch = 1, isTerminator = 1, isBarrier = 1 in { @@ -523,28 +522,26 @@ let isBranch = 1, isTerminator = 1 in // Tail calls let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { - // Darwin versions. - let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC], - Uses = [SP] in { - // tTAILJMPd: Darwin version uses a Thumb2 branch (no Thumb1 tail calls - // on Darwin), so it's in ARMInstrThumb2.td. + // IOS versions. + let Uses = [SP] in { + // tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls + // on IOS), so it's in ARMInstrThumb2.td. def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), 4, IIC_Br, [], (tBX GPR:$dst, (ops 14, zero_reg))>, - Requires<[IsThumb, IsDarwin]>; + Requires<[IsThumb, IsIOS]>; } - // Non-Darwin versions (the difference is R9). - let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC], - Uses = [SP] in { + // Non-IOS versions (the difference is R9). + let Uses = [SP] in { def tTAILJMPdND : tPseudoExpand<(outs), (ins t_brtarget:$dst, pred:$p, variable_ops), 4, IIC_Br, [], (tB t_brtarget:$dst, pred:$p)>, - Requires<[IsThumb, IsNotDarwin]>; + Requires<[IsThumb, IsNotIOS]>; def tTAILJMPrND : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), 4, IIC_Br, [], (tBX GPR:$dst, (ops 14, zero_reg))>, - Requires<[IsThumb, IsNotDarwin]>; + Requires<[IsThumb, IsNotIOS]>; } } @@ -652,7 +649,7 @@ def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, } // Load tconstpool -// FIXME: Use ldr.n to work around a Darwin assembler bug. +// FIXME: Use ldr.n to work around a darwin assembler bug. let canFoldAsLoad = 1, isReMaterializable = 1, isCodeGenOnly = 1 in def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, "ldr", ".n\t$Rt, $addr", @@ -666,10 +663,9 @@ def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, } // FIXME: Remove this entry when the above ldr.n workaround is fixed. -// For disassembly use only. -def tLDRpciDIS : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, - "ldr", "\t$Rt, $addr", - [/* disassembly only */]>, +// For assembly/disassembly use only. +def tLDRpciASM : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, + "ldr", "\t$Rt, $addr", []>, T1Encoding<{0,1,0,0,1,?}> { // A6.2 & A8.6.59 bits<3> Rt; @@ -1262,14 +1258,18 @@ def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val), AddrModeNone, 0, NoItinerary, "","", [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>; -// FIXME: Non-Darwin version(s) +// FIXME: Non-IOS version(s) let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1, Defs = [ R7, LR, SP ] in def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch), AddrModeNone, 0, IndexModeNone, Pseudo, NoItinerary, "", "", [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, - Requires<[IsThumb, IsDarwin]>; + Requires<[IsThumb, IsIOS]>; + +let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12, CPSR ], + isBarrier = 1 in +def tInt_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -1307,20 +1307,20 @@ def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), // Direct calls def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>, - Requires<[IsThumb, IsNotDarwin]>; + Requires<[IsThumb, IsNotIOS]>; def : T1Pat<(ARMtcall texternalsym:$func), (tBLr9 texternalsym:$func)>, - Requires<[IsThumb, IsDarwin]>; + Requires<[IsThumb, IsIOS]>; def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>, - Requires<[IsThumb, HasV5T, IsNotDarwin]>; + Requires<[IsThumb, HasV5T, IsNotIOS]>; def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi_r9 texternalsym:$func)>, - Requires<[IsThumb, HasV5T, IsDarwin]>; + Requires<[IsThumb, HasV5T, IsIOS]>; // Indirect calls to ARM routines def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>, - Requires<[IsThumb, HasV5T, IsNotDarwin]>; + Requires<[IsThumb, HasV5T, IsNotIOS]>; def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr_r9 GPR:$dst)>, - Requires<[IsThumb, HasV5T, IsDarwin]>; + Requires<[IsThumb, HasV5T, IsIOS]>; // zextload i1 -> zextload i8 def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr), diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 981592c..e8984e1 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1,4 +1,4 @@ -//===- ARMInstrThumb2.td - Thumb2 support for ARM -------------------------===// +//===-- ARMInstrThumb2.td - Thumb2 support for ARM ---------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -136,6 +136,12 @@ def t2ldrlabel : Operand<i32> { let PrintMethod = "printT2LdrLabelOperand"; } +def t2ldr_pcrel_imm12_asmoperand : AsmOperandClass {let Name = "MemPCRelImm12";} +def t2ldr_pcrel_imm12 : Operand<i32> { + let ParserMatchClass = t2ldr_pcrel_imm12_asmoperand; + // used for assembler pseudo instruction and maps to t2ldrlabel, so + // doesn't need encoder or print methods of its own. +} // ADR instruction labels. def t2adrlabel : Operand<i32> { @@ -552,6 +558,11 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, PatFrag opnode, string baseOpc, bit Commutable = 0> : T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w"> { + // Assembler aliases w/ the ".w" suffix. + def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rd, $Rn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn, + t2_so_imm:$imm, pred:$p, + cc_out:$s)>; // Assembler aliases w/o the ".w" suffix. def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"), (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn, @@ -563,6 +574,10 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, cc_out:$s)>; // and with the optional destination operand, too. + def : t2InstAlias<!strconcat(opc, "${s}${p}.ri", " $Rdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, + t2_so_imm:$imm, pred:$p, + cc_out:$s)>; def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"), (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, @@ -940,7 +955,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, let DecoderMethod = "DecodeT2LoadShift"; } - // FIXME: Is the pci variant actually needed? + // pci variant is very similar to i12, but supports negative offsets + // from the PC. def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii, opc, ".w\t$Rt, $addr", [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> { @@ -2936,6 +2952,44 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm, i32imm:$imm), IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>, RegConstraint<"$false = $Rd">; + +multiclass T2I_bincc_irs<bits<4> opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> { + // shifted imm + def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), + iii, opc, ".w\t$Rd, $Rn, $imm", []>, + RegConstraint<"$Rn = $Rd"> { + let Inst{31-27} = 0b11110; + let Inst{25} = 0; + let Inst{24-21} = opcod; + let Inst{15} = 0; + } + // register + def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), + iir, opc, ".w\t$Rd, $Rn, $Rm", []>, + RegConstraint<"$Rn = $Rd"> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{14-12} = 0b000; // imm3 + let Inst{7-6} = 0b00; // imm2 + let Inst{5-4} = 0b00; // type + } + // shifted register + def rs : T2sTwoRegShiftedReg<(outs rGPR:$Rd), + (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), + iis, opc, ".w\t$Rd, $Rn, $ShiftedRm", []>, + RegConstraint<"$Rn = $Rd"> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + } +} // T2I_bincc_irs + +defm t2ANDCC : T2I_bincc_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsi>; +defm t2ORRCC : T2I_bincc_irs<0b0010, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsi>; +defm t2EORCC : T2I_bincc_irs<0b0100, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsi>; + } // isCodeGenOnly = 1 } // neverHasSideEffects @@ -3058,9 +3112,7 @@ def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, let Inst{11-8} = Rd; let Inst{7-0} = addr{7-0}; } -} - -let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in +let hasExtraSrcRegAllocReq = 1 in def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, @@ -3069,6 +3121,7 @@ def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), bits<4> Rt2; let Inst{11-8} = Rt2; } +} def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>, Requires<[IsThumb2, HasV7]> { @@ -3096,7 +3149,7 @@ def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>, // $val is a scratch register for our use. let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR, - QQQQ0, QQQQ1, QQQQ2, QQQQ3 ], + Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15], hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val), @@ -3212,18 +3265,49 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, let DecoderMethod = "DecodeThumb2BCCInstruction"; } -// Tail calls. The Darwin version of thumb tail calls uses a t2 branch, so +// Tail calls. The IOS version of thumb tail calls uses a t2 branch, so // it goes here. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { - // Darwin version. - let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC], - Uses = [SP] in + // IOS version. + let Uses = [SP] in def tTAILJMPd: tPseudoExpand<(outs), (ins uncondbrtarget:$dst, pred:$p, variable_ops), 4, IIC_Br, [], (t2B uncondbrtarget:$dst, pred:$p)>, - Requires<[IsThumb2, IsDarwin]>; -} + Requires<[IsThumb2, IsIOS]>; +} + +let isCall = 1, + // On non-IOS platforms R9 is callee-saved. + Defs = [LR], Uses = [SP] in { + // mov lr, pc; b if callee is marked noreturn to avoid confusing the + // return stack predictor. + def t2BMOVPCB_CALL : tPseudoInst<(outs), + (ins t_bltarget:$func, variable_ops), + 6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, + Requires<[IsThumb, IsNotIOS]>; +} + +let isCall = 1, + // On IOS R9 is call-clobbered. + // R7 is marked as a use to prevent frame-pointer assignments from being + // moved above / below calls. + Defs = [LR], Uses = [R7, SP] in { + // mov lr, pc; b if callee is marked noreturn to avoid confusing the + // return stack predictor. + def t2BMOVPCBr9_CALL : tPseudoInst<(outs), + (ins t_bltarget:$func, variable_ops), + 6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, + Requires<[IsThumb, IsIOS]>; +} + +// Direct calls +def : T2Pat<(ARMcall_nolink texternalsym:$func), + (t2BMOVPCB_CALL texternalsym:$func)>, + Requires<[IsThumb, IsNotIOS]>; +def : T2Pat<(ARMcall_nolink texternalsym:$func), + (t2BMOVPCBr9_CALL texternalsym:$func)>, + Requires<[IsThumb, IsIOS]>; // IT block let Defs = [ITSTATE] in @@ -4141,6 +4225,37 @@ def t2MOVsi: t2AsmPseudo<"mov${p} $Rd, $shift", def t2MOVSsi: t2AsmPseudo<"movs${p} $Rd, $shift", (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>; +def t2MOVsr: t2AsmPseudo<"mov${p} $Rd, $shift", + (ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>; +def t2MOVSsr: t2AsmPseudo<"movs${p} $Rd, $shift", + (ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>; + // ADR w/o the .w suffix def : t2InstAlias<"adr${p} $Rd, $addr", (t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>; + +// LDR(literal) w/ alternate [pc, #imm] syntax. +def t2LDRpcrel : t2AsmPseudo<"ldr${p} $Rt, $addr", + (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def t2LDRBpcrel : t2AsmPseudo<"ldrb${p} $Rt, $addr", + (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def t2LDRHpcrel : t2AsmPseudo<"ldrh${p} $Rt, $addr", + (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def t2LDRSBpcrel : t2AsmPseudo<"ldrsb${p} $Rt, $addr", + (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def t2LDRSHpcrel : t2AsmPseudo<"ldrsh${p} $Rt, $addr", + (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; + // Version w/ the .w suffix. +def : t2InstAlias<"ldr${p}.w $Rt, $addr", + (t2LDRpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def : t2InstAlias<"ldrb${p}.w $Rt, $addr", + (t2LDRBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def : t2InstAlias<"ldrh${p}.w $Rt, $addr", + (t2LDRHpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def : t2InstAlias<"ldrsb${p}.w $Rt, $addr", + (t2LDRSBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def : t2InstAlias<"ldrsh${p}.w $Rt, $addr", + (t2LDRSHpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; + +def : t2InstAlias<"add${p} $Rd, pc, $imm", + (t2ADR rGPR:$Rd, imm0_4095:$imm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 5d43556..aa10af7 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -1,4 +1,4 @@ -//===- ARMInstrVFP.td - VFP support for ARM ----------------*- tablegen -*-===// +//===-- ARMInstrVFP.td - VFP support for ARM ---------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -61,6 +61,22 @@ def vfp_f64imm : Operand<f64>, let ParserMatchClass = FPImmOperand; } +// The VCVT to/from fixed-point instructions encode the 'fbits' operand +// (the number of fixed bits) differently than it appears in the assembly +// source. It's encoded as "Size - fbits" where Size is the size of the +// fixed-point representation (32 or 16) and fbits is the value appearing +// in the assembly source, an integer in [0,16] or (0,32], depending on size. +def fbits32_asm_operand : AsmOperandClass { let Name = "FBits32"; } +def fbits32 : Operand<i32> { + let PrintMethod = "printFBits32"; + let ParserMatchClass = fbits32_asm_operand; +} + +def fbits16_asm_operand : AsmOperandClass { let Name = "FBits16"; } +def fbits16 : Operand<i32> { + let PrintMethod = "printFBits16"; + let ParserMatchClass = fbits16_asm_operand; +} //===----------------------------------------------------------------------===// // Load / store Instructions. @@ -790,127 +806,109 @@ def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, // S32 (U=0, sx=1) -> SL // U32 (U=1, sx=1) -> UL -// FIXME: Marking these as codegen only seems wrong. They are real -// instructions(?) -let Constraints = "$a = $dst", isCodeGenOnly = 1 in { +let Constraints = "$a = $dst" in { // FP to Fixed-Point: def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), + IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>; def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), + IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>; def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), + IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>; def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), + IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>; // Fixed-Point to FP: def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), + IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>; def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), + IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>; def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), + IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>; def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), + IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>; -} // End of 'let Constraints = "$a = $dst", isCodeGenOnly = 1 in' +} // End of 'let Constraints = "$a = $dst" in' //===----------------------------------------------------------------------===// // FP Multiply-Accumulate Operations. @@ -922,7 +920,7 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0, [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def VMLAS : ASbIn<0b11100, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -930,7 +928,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -938,10 +936,10 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>; + Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,NoVFP4]>; def VMLSD : ADbI<0b11100, 0b00, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -949,7 +947,7 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def VMLSS : ASbIn<0b11100, 0b00, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -957,7 +955,7 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -965,10 +963,10 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; def VNMLAD : ADbI<0b11100, 0b01, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -976,7 +974,7 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def VNMLAS : ASbI<0b11100, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -984,7 +982,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -992,10 +990,10 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; def VNMLSD : ADbI<0b11100, 0b01, 0, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1003,14 +1001,14 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def VNMLSS : ASbI<0b11100, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1018,11 +1016,116 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; + +//===----------------------------------------------------------------------===// +// Fused FP Multiply-Accumulate Operations. +// +def VFMAD : ADbI<0b11101, 0b10, 0, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpFMAC64, "vfma", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP4,FPContractions]>; +def VFMAS : ASbIn<0b11101, 0b10, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC32, "vfma", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), + SPR:$Sdin))]>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines. +} + +def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), + (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP4,FPContractions]>; +def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), + (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + +def VFMSD : ADbI<0b11101, 0b10, 1, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP4,FPContractions]>; + +def VFMSS : ASbIn<0b11101, 0b10, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC32, "vfms", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), + SPR:$Sdin))]>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines. +} + +def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), + (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP4,FPContractions]>; +def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), + (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + +def VFNMAD : ADbI<0b11101, 0b01, 1, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP4,FPContractions]>; + +def VFNMAS : ASbI<0b11101, 0b01, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC32, "vfnma", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), + SPR:$Sdin))]>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines. +} + +def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), + (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP4,FPContractions]>; +def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), + (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + +def VFNMSD : ADbI<0b11101, 0b01, 0, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpFMAC64, "vfnms", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP4,FPContractions]>; + +def VFNMSS : ASbI<0b11101, 0b01, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines. +} + +def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), + (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP4,FPContractions]>; +def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), + (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; //===----------------------------------------------------------------------===// // FP Conditional moves. @@ -1175,6 +1278,7 @@ def : VFP2MnemonicAlias<"fmrdd", "vmov">; def : VFP2MnemonicAlias<"fmrds", "vmov">; def : VFP2MnemonicAlias<"fmrrd", "vmov">; def : VFP2MnemonicAlias<"fmdrr", "vmov">; +def : VFP2MnemonicAlias<"fmuls", "vmul.f32">; def : VFP2MnemonicAlias<"fmuld", "vmul.f64">; def : VFP2MnemonicAlias<"fnegs", "vneg.f32">; def : VFP2MnemonicAlias<"fnegd", "vneg.f64">; @@ -1194,6 +1298,12 @@ def : VFP2MnemonicAlias<"fsts", "vstr">; def : VFP2MnemonicAlias<"fstd", "vstr">; def : VFP2MnemonicAlias<"fmacd", "vmla.f64">; def : VFP2MnemonicAlias<"fmacs", "vmla.f32">; +def : VFP2MnemonicAlias<"fcpys", "vmov.f32">; +def : VFP2MnemonicAlias<"fcpyd", "vmov.f64">; +def : VFP2MnemonicAlias<"fcmps", "vcmp.f32">; +def : VFP2MnemonicAlias<"fcmpd", "vcmp.f64">; +def : VFP2MnemonicAlias<"fdivs", "vdiv.f32">; +def : VFP2MnemonicAlias<"fdivd", "vdiv.f64">; def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>; def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm", @@ -1235,10 +1345,18 @@ def : VFP2InstAlias<"vsub${p}.f64 $Dn, $Dm", def : VFP2InstAlias<"vsub${p}.f32 $Sn, $Sm", (VSUBS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>; -// VMOV can accept optional .f32/.f64 suffix. -def : VFP2InstAlias<"vmov${p}.f32 $Rt, $Sn", +// VMOV can accept optional 32-bit or less data type suffix suffix. +def : VFP2InstAlias<"vmov${p}.8 $Rt, $Sn", + (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>; +def : VFP2InstAlias<"vmov${p}.16 $Rt, $Sn", + (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>; +def : VFP2InstAlias<"vmov${p}.32 $Rt, $Sn", (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>; -def : VFP2InstAlias<"vmov${p}.f32 $Sn, $Rt", +def : VFP2InstAlias<"vmov${p}.8 $Sn, $Rt", + (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>; +def : VFP2InstAlias<"vmov${p}.16 $Sn, $Rt", + (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>; +def : VFP2InstAlias<"vmov${p}.32 $Sn, $Rt", (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>; def : VFP2InstAlias<"vmov${p}.f64 $Rt, $Rt2, $Dn", diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h index 2f97928..7928184 100644 --- a/lib/Target/ARM/ARMJITInfo.h +++ b/lib/Target/ARM/ARMJITInfo.h @@ -1,4 +1,4 @@ -//===- ARMJITInfo.h - ARM implementation of the JIT interface --*- C++ -*-===// +//===-- ARMJITInfo.h - ARM implementation of the JIT interface -*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 6712fb6..0f6dc04 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1,4 +1,4 @@ -//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// +//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// // // The LLVM Compiler Infrastructure // @@ -144,7 +144,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::db: return ARM::LDMDB; case ARM_AM::ib: return ARM::LDMIB; } - break; case ARM::STRi12: ++NumSTMGened; switch (Mode) { @@ -154,7 +153,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::db: return ARM::STMDB; case ARM_AM::ib: return ARM::STMIB; } - break; case ARM::t2LDRi8: case ARM::t2LDRi12: ++NumLDMGened; @@ -163,7 +161,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ia: return ARM::t2LDMIA; case ARM_AM::db: return ARM::t2LDMDB; } - break; case ARM::t2STRi8: case ARM::t2STRi12: ++NumSTMGened; @@ -172,7 +169,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ia: return ARM::t2STMIA; case ARM_AM::db: return ARM::t2STMDB; } - break; case ARM::VLDRS: ++NumVLDMGened; switch (Mode) { @@ -180,7 +176,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ia: return ARM::VLDMSIA; case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. } - break; case ARM::VSTRS: ++NumVSTMGened; switch (Mode) { @@ -188,7 +183,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ia: return ARM::VSTMSIA; case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. } - break; case ARM::VLDRD: ++NumVLDMGened; switch (Mode) { @@ -196,7 +190,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ia: return ARM::VLDMDIA; case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. } - break; case ARM::VSTRD: ++NumVSTMGened; switch (Mode) { @@ -204,10 +197,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ia: return ARM::VSTMDIA; case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. } - break; } - - return 0; } namespace llvm { @@ -262,8 +252,6 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) { case ARM::STMIB_UPD: return ARM_AM::ib; } - - return ARM_AM::bad_am_submode; } } // end namespace ARM_AM @@ -509,50 +497,84 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, return; } -static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, - unsigned Bytes, unsigned Limit, - ARMCC::CondCodes Pred, unsigned PredReg){ +static bool definesCPSR(MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) + // If the instruction has live CPSR def, then it's not safe to fold it + // into load / store. + return true; + } + + return false; +} + +static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, + unsigned Bytes, unsigned Limit, + ARMCC::CondCodes Pred, unsigned PredReg) { unsigned MyPredReg = 0; if (!MI) return false; - if (MI->getOpcode() != ARM::t2SUBri && - MI->getOpcode() != ARM::tSUBspi && - MI->getOpcode() != ARM::SUBri) - return false; + + bool CheckCPSRDef = false; + switch (MI->getOpcode()) { + default: return false; + case ARM::t2SUBri: + case ARM::SUBri: + CheckCPSRDef = true; + // fallthrough + case ARM::tSUBspi: + break; + } // Make sure the offset fits in 8 bits. if (Bytes == 0 || (Limit && Bytes >= Limit)) return false; unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME - return (MI->getOperand(0).getReg() == Base && - MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm()*Scale) == Bytes && - llvm::getInstrPredicate(MI, MyPredReg) == Pred && - MyPredReg == PredReg); + if (!(MI->getOperand(0).getReg() == Base && + MI->getOperand(1).getReg() == Base && + (MI->getOperand(2).getImm()*Scale) == Bytes && + llvm::getInstrPredicate(MI, MyPredReg) == Pred && + MyPredReg == PredReg)) + return false; + + return CheckCPSRDef ? !definesCPSR(MI) : true; } -static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, - unsigned Bytes, unsigned Limit, - ARMCC::CondCodes Pred, unsigned PredReg){ +static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, + unsigned Bytes, unsigned Limit, + ARMCC::CondCodes Pred, unsigned PredReg) { unsigned MyPredReg = 0; if (!MI) return false; - if (MI->getOpcode() != ARM::t2ADDri && - MI->getOpcode() != ARM::tADDspi && - MI->getOpcode() != ARM::ADDri) - return false; + + bool CheckCPSRDef = false; + switch (MI->getOpcode()) { + default: return false; + case ARM::t2ADDri: + case ARM::ADDri: + CheckCPSRDef = true; + // fallthrough + case ARM::tADDspi: + break; + } if (Bytes == 0 || (Limit && Bytes >= Limit)) // Make sure the offset fits in 8 bits. return false; unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME - return (MI->getOperand(0).getReg() == Base && - MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm()*Scale) == Bytes && - llvm::getInstrPredicate(MI, MyPredReg) == Pred && - MyPredReg == PredReg); + if (!(MI->getOperand(0).getReg() == Base && + MI->getOperand(1).getReg() == Base && + (MI->getOperand(2).getImm()*Scale) == Bytes && + llvm::getInstrPredicate(MI, MyPredReg) == Pred && + MyPredReg == PredReg)) + return false; + + return CheckCPSRDef ? !definesCPSR(MI) : true; } static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { @@ -606,7 +628,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, case ARM_AM::da: return ARM::LDMDA_UPD; case ARM_AM::db: return ARM::LDMDB_UPD; } - break; case ARM::STMIA: case ARM::STMDA: case ARM::STMDB: @@ -618,7 +639,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, case ARM_AM::da: return ARM::STMDA_UPD; case ARM_AM::db: return ARM::STMDB_UPD; } - break; case ARM::t2LDMIA: case ARM::t2LDMDB: switch (Mode) { @@ -626,7 +646,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, case ARM_AM::ia: return ARM::t2LDMIA_UPD; case ARM_AM::db: return ARM::t2LDMDB_UPD; } - break; case ARM::t2STMIA: case ARM::t2STMDB: switch (Mode) { @@ -634,38 +653,31 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, case ARM_AM::ia: return ARM::t2STMIA_UPD; case ARM_AM::db: return ARM::t2STMDB_UPD; } - break; case ARM::VLDMSIA: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VLDMSIA_UPD; case ARM_AM::db: return ARM::VLDMSDB_UPD; } - break; case ARM::VLDMDIA: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VLDMDIA_UPD; case ARM_AM::db: return ARM::VLDMDDB_UPD; } - break; case ARM::VSTMSIA: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VSTMSIA_UPD; case ARM_AM::db: return ARM::VSTMSDB_UPD; } - break; case ARM::VSTMDIA: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VSTMDIA_UPD; case ARM_AM::db: return ARM::VSTMDDB_UPD; } - break; } - - return 0; } /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base @@ -786,7 +798,6 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, return ARM::t2STR_PRE; default: llvm_unreachable("Unhandled opcode!"); } - return 0; } static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, @@ -812,7 +823,6 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, return ARM::t2STR_POST; default: llvm_unreachable("Unhandled opcode!"); } - return 0; } /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base @@ -1639,8 +1649,9 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, LastOp = Op; } - unsigned Opcode = Op->getOpcode(); - if (LastOpcode && Opcode != LastOpcode) + unsigned LSMOpcode + = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); + if (LastOpcode && LSMOpcode != LastOpcode) break; int Offset = getMemoryOpOffset(Op); @@ -1651,7 +1662,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, } LastOffset = Offset; LastBytes = Bytes; - LastOpcode = Opcode; + LastOpcode = LSMOpcode; if (++NumMove == 8) // FIXME: Tune this limit. break; } diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index daa126d..e2ac9a4 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -31,8 +31,7 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO, Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext); switch (MO.getTargetFlags()) { - default: - assert(0 && "Unknown target flag on symbol operand"); + default: llvm_unreachable("Unknown target flag on symbol operand"); case 0: break; case ARMII::MO_LO16: @@ -67,9 +66,7 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO, bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) { switch (MO.getType()) { - default: - assert(0 && "unknown operand type"); - return false; + default: llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: // Ignore all non-CPSR implicit register operands. if (MO.isImplicit() && MO.getReg() != ARM::CPSR) @@ -107,6 +104,9 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO, MCOp = MCOperand::CreateFPImm(Val.convertToDouble()); break; } + case MachineOperand::MO_RegisterMask: + // Ignore call clobbers. + return false; } return true; } diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp new file mode 100644 index 0000000..af445e2 --- /dev/null +++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp @@ -0,0 +1,14 @@ +//===-- ARMMachineFuctionInfo.cpp - ARM machine function info -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ARMMachineFunctionInfo.h" + +using namespace llvm; + +void ARMFunctionInfo::anchor() { } diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 138f0c2..f1c8fc8 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -1,4 +1,4 @@ -//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===// +//===-- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -25,6 +25,7 @@ namespace llvm { /// ARMFunctionInfo - This class is derived from MachineFunctionInfo and /// contains private ARM-specific information for each MachineFunction. class ARMFunctionInfo : public MachineFunctionInfo { + virtual void anchor(); /// isThumb - True if this function is compiled under Thumb mode. /// Used to initialized Align, so must precede it. @@ -63,6 +64,9 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// GPR callee-saved (2) : r8, r10, r11 /// -------------------------------------------- /// DPR callee-saved : d8 - d15 + /// + /// Also see AlignedDPRCSRegs below. Not all D-regs need to go in area 3. + /// Some may be spilled after the stack has been realigned. unsigned GPRCS1Offset; unsigned GPRCS2Offset; unsigned DPRCSOffset; @@ -79,6 +83,15 @@ class ARMFunctionInfo : public MachineFunctionInfo { BitVector GPRCS2Frames; BitVector DPRCSFrames; + /// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in + /// the aligned portion of the stack frame. This is always a contiguous + /// sequence of D-registers starting from d8. + /// + /// We do not keep track of the frame indices used for these registers - they + /// behave like any other frame index in the aligned stack frame. These + /// registers also aren't included in DPRCSSize above. + unsigned NumAlignedDPRCS2Regs; + /// JumpTableUId - Unique id for jumptables. /// unsigned JumpTableUId; @@ -104,6 +117,7 @@ public: FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), + NumAlignedDPRCS2Regs(0), JumpTableUId(0), PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false) {} @@ -137,6 +151,9 @@ public: unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; } void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; } + unsigned getNumAlignedDPRCS2Regs() const { return NumAlignedDPRCS2Regs; } + void setNumAlignedDPRCS2Regs(unsigned n) { NumAlignedDPRCS2Regs = n; } + unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; } unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; } unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; } diff --git a/lib/Target/ARM/ARMPerfectShuffle.h b/lib/Target/ARM/ARMPerfectShuffle.h index 18e1620..efa22fb 100644 --- a/lib/Target/ARM/ARMPerfectShuffle.h +++ b/lib/Target/ARM/ARMPerfectShuffle.h @@ -1,4 +1,4 @@ -//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table ------------------===// +//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp index 1cba1ba..1f83762 100644 --- a/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- ARMRegisterInfo.cpp - ARM Register Information -----------*- C++ -*-===// +//===-- ARMRegisterInfo.cpp - ARM Register Information --------------------===// // // The LLVM Compiler Infrastructure // @@ -16,6 +16,8 @@ #include "ARMRegisterInfo.h" using namespace llvm; +void ARMRegisterInfo::anchor() { } + ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) : ARMBaseRegisterInfo(tii, sti) { diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index 8edfb9a..65ed95d 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -1,4 +1,4 @@ -//===- ARMRegisterInfo.h - ARM Register Information Impl --------*- C++ -*-===// +//===-- ARMRegisterInfo.h - ARM Register Information Impl -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -24,6 +24,7 @@ namespace llvm { class Type; struct ARMRegisterInfo : public ARMBaseRegisterInfo { + virtual void anchor(); public: ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); }; diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 036822d..b16a12c 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -1,4 +1,4 @@ -//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===// +//===-- ARMRegisterInfo.td - ARM Register defs -------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -16,6 +16,8 @@ class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> { field bits<4> Num; let Namespace = "ARM"; let SubRegs = subregs; + // All bits of ARM registers with sub-registers are covered by sub-registers. + let CoveredBySubRegs = 1; } class ARMFReg<bits<6> num, string n> : Register<n> { @@ -25,28 +27,30 @@ class ARMFReg<bits<6> num, string n> : Register<n> { // Subregister indices. let Namespace = "ARM" in { +def qqsub_0 : SubRegIndex; +def qqsub_1 : SubRegIndex; + // Note: Code depends on these having consecutive numbers. -def ssub_0 : SubRegIndex; -def ssub_1 : SubRegIndex; -def ssub_2 : SubRegIndex; // In a Q reg. -def ssub_3 : SubRegIndex; +def qsub_0 : SubRegIndex; +def qsub_1 : SubRegIndex; +def qsub_2 : SubRegIndex<[qqsub_1, qsub_0]>; +def qsub_3 : SubRegIndex<[qqsub_1, qsub_1]>; def dsub_0 : SubRegIndex; def dsub_1 : SubRegIndex; -def dsub_2 : SubRegIndex; -def dsub_3 : SubRegIndex; -def dsub_4 : SubRegIndex; -def dsub_5 : SubRegIndex; -def dsub_6 : SubRegIndex; -def dsub_7 : SubRegIndex; +def dsub_2 : SubRegIndex<[qsub_1, dsub_0]>; +def dsub_3 : SubRegIndex<[qsub_1, dsub_1]>; +def dsub_4 : SubRegIndex<[qsub_2, dsub_0]>; +def dsub_5 : SubRegIndex<[qsub_2, dsub_1]>; +def dsub_6 : SubRegIndex<[qsub_3, dsub_0]>; +def dsub_7 : SubRegIndex<[qsub_3, dsub_1]>; -def qsub_0 : SubRegIndex; -def qsub_1 : SubRegIndex; -def qsub_2 : SubRegIndex; -def qsub_3 : SubRegIndex; - -def qqsub_0 : SubRegIndex; -def qqsub_1 : SubRegIndex; +def ssub_0 : SubRegIndex; +def ssub_1 : SubRegIndex; +def ssub_2 : SubRegIndex<[dsub_1, ssub_0]>; +def ssub_3 : SubRegIndex<[dsub_1, ssub_1]>; +// Let TableGen synthesize the remaining 12 ssub_* indices. +// We don't need to name them. } // Integer registers @@ -127,9 +131,7 @@ def D30 : ARMFReg<30, "d30">, DwarfRegNum<[286]>; def D31 : ARMFReg<31, "d31">, DwarfRegNum<[287]>; // Advanced SIMD (NEON) defines 16 quad-word aliases -let SubRegIndices = [dsub_0, dsub_1], - CompositeIndices = [(ssub_2 dsub_1, ssub_0), - (ssub_3 dsub_1, ssub_1)] in { +let SubRegIndices = [dsub_0, dsub_1] in { def Q0 : ARMReg< 0, "q0", [D0, D1]>; def Q1 : ARMReg< 1, "q1", [D2, D3]>; def Q2 : ARMReg< 2, "q2", [D4, D5]>; @@ -150,36 +152,6 @@ def Q14 : ARMReg<14, "q14", [D28, D29]>; def Q15 : ARMReg<15, "q15", [D30, D31]>; } -// Pseudo 256-bit registers to represent pairs of Q registers. These should -// never be present in the emitted code. -// These are used for NEON load / store instructions, e.g., vld4, vst3. -// NOTE: It's possible to define more QQ registers since technically the -// starting D register number doesn't have to be multiple of 4, e.g., -// D1, D2, D3, D4 would be a legal quad, but that would make the subregister -// stuff very messy. -let SubRegIndices = [qsub_0, qsub_1], - CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)] in { -def QQ0 : ARMReg<0, "qq0", [Q0, Q1]>; -def QQ1 : ARMReg<1, "qq1", [Q2, Q3]>; -def QQ2 : ARMReg<2, "qq2", [Q4, Q5]>; -def QQ3 : ARMReg<3, "qq3", [Q6, Q7]>; -def QQ4 : ARMReg<4, "qq4", [Q8, Q9]>; -def QQ5 : ARMReg<5, "qq5", [Q10, Q11]>; -def QQ6 : ARMReg<6, "qq6", [Q12, Q13]>; -def QQ7 : ARMReg<7, "qq7", [Q14, Q15]>; -} - -// Pseudo 512-bit registers to represent four consecutive Q registers. -let SubRegIndices = [qqsub_0, qqsub_1], - CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1), - (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1), - (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3)] in { -def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>; -def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>; -def QQQQ2 : ARMReg<2, "qqqq2", [QQ4, QQ5]>; -def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>; -} - // Current Program Status Register. def CPSR : ARMReg<0, "cpsr">; def APSR : ARMReg<1, "apsr">; @@ -261,6 +233,12 @@ def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R9, R12)> { }]; } +// Condition code registers. +def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> { + let CopyCost = -1; // Don't allow copying of status registers. + let isAllocatable = 0; +} + // Scalar single precision floating point register class.. def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>; @@ -316,37 +294,98 @@ def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], (DPR_8 dsub_0, dsub_1)]; } +// Pseudo-registers representing odd-even pairs of D registers. The even-odd +// pairs are already represented by the Q registers. +// These are needed by NEON instructions requiring two consecutive D registers. +// There is no D31_D0 register as that is always an UNPREDICTABLE encoding. +def TuplesOE2D : RegisterTuples<[dsub_0, dsub_1], + [(decimate (shl DPR, 1), 2), + (decimate (shl DPR, 2), 2)]>; + +// Register class representing a pair of consecutive D registers. +// Use the Q registers for the even-odd pairs. +def DPair : RegisterClass<"ARM", [v2i64], 128, (interleave QPR, TuplesOE2D)> { + // Allocate starting at non-VFP2 registers D16-D31 first. + let AltOrders = [(rotl DPair, 16)]; + let AltOrderSelect = [{ return 1; }]; +} + +// Pseudo-registers representing 3 consecutive D registers. +def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2], + [(shl DPR, 0), + (shl DPR, 1), + (shl DPR, 2)]>; + +// 3 consecutive D registers. +def DTriple : RegisterClass<"ARM", [untyped], 64, (add Tuples3D)> { + let Size = 192; // 3 x 64 bits, we have no predefined type of that size. +} + +// Pseudo 256-bit registers to represent pairs of Q registers. These should +// never be present in the emitted code. +// These are used for NEON load / store instructions, e.g., vld4, vst3. +def Tuples2Q : RegisterTuples<[qsub_0, qsub_1], [(shl QPR, 0), (shl QPR, 1)]>; + // Pseudo 256-bit vector register class to model pairs of Q registers // (4 consecutive D registers). -def QQPR : RegisterClass<"ARM", [v4i64], 256, (sequence "QQ%u", 0, 7)> { +def QQPR : RegisterClass<"ARM", [v4i64], 256, (add Tuples2Q)> { let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3), (QPR qsub_0, qsub_1)]; // Allocate non-VFP2 aliases first. - let AltOrders = [(rotl QQPR, 4)]; + let AltOrders = [(rotl QQPR, 8)]; let AltOrderSelect = [{ return 1; }]; } -// Subset of QQPR that have 32-bit SPR subregs. -def QQPR_VFP2 : RegisterClass<"ARM", [v4i64], 256, (trunc QQPR, 4)> { - let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3), - (DPR_VFP2 dsub_0, dsub_1, dsub_2, dsub_3), - (QPR_VFP2 qsub_0, qsub_1)]; +// Tuples of 4 D regs that isn't also a pair of Q regs. +def TuplesOE4D : RegisterTuples<[dsub_0, dsub_1, dsub_2, dsub_3], + [(decimate (shl DPR, 1), 2), + (decimate (shl DPR, 2), 2), + (decimate (shl DPR, 3), 2), + (decimate (shl DPR, 4), 2)]>; -} +// 4 consecutive D registers. +def DQuad : RegisterClass<"ARM", [v4i64], 256, + (interleave Tuples2Q, TuplesOE4D)>; + +// Pseudo 512-bit registers to represent four consecutive Q registers. +def Tuples2QQ : RegisterTuples<[qqsub_0, qqsub_1], + [(shl QQPR, 0), (shl QQPR, 2)]>; // Pseudo 512-bit vector register class to model 4 consecutive Q registers // (8 consecutive D registers). -def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (sequence "QQQQ%u", 0, 3)> { +def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (add Tuples2QQ)> { let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3, dsub_4, dsub_5, dsub_6, dsub_7), (QPR qsub_0, qsub_1, qsub_2, qsub_3)]; // Allocate non-VFP2 aliases first. - let AltOrders = [(rotl QQQQPR, 2)]; + let AltOrders = [(rotl QQQQPR, 8)]; let AltOrderSelect = [{ return 1; }]; } -// Condition code registers. -def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> { - let CopyCost = -1; // Don't allow copying of status registers. - let isAllocatable = 0; + +// Pseudo-registers representing 2-spaced consecutive D registers. +def Tuples2DSpc : RegisterTuples<[dsub_0, dsub_2], + [(shl DPR, 0), + (shl DPR, 2)]>; + +// Spaced pairs of D registers. +def DPairSpc : RegisterClass<"ARM", [v2i64], 64, (add Tuples2DSpc)>; + +def Tuples3DSpc : RegisterTuples<[dsub_0, dsub_2, dsub_4], + [(shl DPR, 0), + (shl DPR, 2), + (shl DPR, 4)]>; + +// Spaced triples of D registers. +def DTripleSpc : RegisterClass<"ARM", [untyped], 64, (add Tuples3DSpc)> { + let Size = 192; // 3 x 64 bits, we have no predefined type of that size. } + +def Tuples4DSpc : RegisterTuples<[dsub_0, dsub_2, dsub_4, dsub_6], + [(shl DPR, 0), + (shl DPR, 2), + (shl DPR, 4), + (shl DPR, 6)]>; + +// Spaced quads of D registers. +def DQuadSpc : RegisterClass<"ARM", [v4i64], 64, (add Tuples3DSpc)>; diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h index 291f3cc..9c32b15 100644 --- a/lib/Target/ARM/ARMRelocations.h +++ b/lib/Target/ARM/ARMRelocations.h @@ -1,4 +1,4 @@ -//===- ARMRelocations.h - ARM Code Relocations ------------------*- C++ -*-===// +//===-- ARMRelocations.h - ARM Code Relocations -----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 958c5c6..45486fd 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -1,10 +1,10 @@ -//===- ARMSchedule.td - ARM Scheduling Definitions ---------*- tablegen -*-===// -// +//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -118,6 +118,8 @@ def IIC_fpMUL32 : InstrItinClass; def IIC_fpMUL64 : InstrItinClass; def IIC_fpMAC32 : InstrItinClass; def IIC_fpMAC64 : InstrItinClass; +def IIC_fpFMAC32 : InstrItinClass; +def IIC_fpFMAC64 : InstrItinClass; def IIC_fpDIV32 : InstrItinClass; def IIC_fpDIV64 : InstrItinClass; def IIC_fpSQRT32 : InstrItinClass; @@ -208,6 +210,8 @@ def IIC_VPERMQ : InstrItinClass; def IIC_VPERMQ3 : InstrItinClass; def IIC_VMACD : InstrItinClass; def IIC_VMACQ : InstrItinClass; +def IIC_VFMACD : InstrItinClass; +def IIC_VFMACQ : InstrItinClass; def IIC_VRECSD : InstrItinClass; def IIC_VRECSQ : InstrItinClass; def IIC_VCNTiD : InstrItinClass; diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index c1880a7..4d959f5 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -1,10 +1,10 @@ -//===- ARMScheduleV6.td - ARM v6 Scheduling Definitions ----*- tablegen -*-===// -// +//===-- ARMScheduleV6.td - ARM v6 Scheduling Definitions ---*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines the itinerary class data for the ARM v6 processors. diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 36d58de..e2530d0 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -145,8 +145,8 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { - // Use default for non AAPCS subtargets - if (!Subtarget->isAAPCS_ABI()) + // Use default for non AAPCS (or Darwin) subtargets + if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetDarwin()) return SDValue(); const ARMTargetLowering &TLI = @@ -189,6 +189,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, 0, // number of fixed arguments TLI.getLibcallCallingConv(RTLIB::MEMSET), // call conv false, // is tail call + false, // does not return false, // is return val used DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), TLI.getPointerTy()), // callee diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 2cb5ab9..1bd6f1c 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -1,4 +1,4 @@ -//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===// +//===-- ARMSubtarget.cpp - ARM Subtarget Information ----------------------===// // // The LLVM Compiler Infrastructure // @@ -47,7 +47,9 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , HasV7Ops(false) , HasVFPv2(false) , HasVFPv3(false) + , HasVFPv4(false) , HasNEON(false) + , HasNEON2(false) , UseNEONForSinglePrecisionFP(false) , SlowFPVMLx(false) , HasVMLxForwarding(false) @@ -103,18 +105,19 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, computeIssueWidth(); if (TT.find("eabi") != std::string::npos) + // FIXME: We might want to separate AAPCS and EABI. Some systems, e.g. + // Darwin-EABI conforms to AACPS but not the rest of EABI. TargetABI = ARM_ABI_AAPCS; if (isAAPCS_ABI()) stackAlignment = 8; - if (!isTargetDarwin()) + if (!isTargetIOS()) UseMovt = hasV6T2Ops(); else { IsR9Reserved = ReserveR9 | !HasV6Ops; UseMovt = DarwinUseMOVT && hasV6T2Ops(); - const Triple &T = getTargetTriple(); - SupportsTailCall = T.getOS() == Triple::IOS && !T.isOSVersionLT(5, 0); + SupportsTailCall = !getTargetTriple().isOSVersionLT(5, 0); } if (!isThumb() || hasThumb2()) diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index a35f450..3d9c03d 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -1,4 +1,4 @@ -//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====// +//===-- ARMSubtarget.h - Define Subtarget for the ARM ----------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -45,11 +45,13 @@ protected: bool HasV6T2Ops; bool HasV7Ops; - /// HasVFPv2, HasVFPv3, HasNEON - Specify what floating point ISAs are - /// supported. + /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON, HasNEONVFPv4 - Specify what + /// floating point ISAs are supported. bool HasVFPv2; bool HasVFPv3; + bool HasVFPv4; bool HasNEON; + bool HasNEON2; /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been /// specified. Use the method useNEONForSinglePrecisionFP() to @@ -123,6 +125,10 @@ protected: /// CPSR setting instruction. bool AvoidCPSRPartialUpdate; + /// HasRAS - Some processors perform return stack prediction. CodeGen should + /// avoid issue "normal" call instructions to callees which do not return. + bool HasRAS; + /// HasMPExtension - True if the subtarget supports Multiprocessing /// extension (ARMv7 only). bool HasMPExtension; @@ -197,7 +203,9 @@ protected: bool hasVFP2() const { return HasVFPv2; } bool hasVFP3() const { return HasVFPv3; } + bool hasVFP4() const { return HasVFPv4; } bool hasNEON() const { return HasNEON; } + bool hasNEON2() const { return HasNEON2 || (HasNEON && HasVFPv4); } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } @@ -210,6 +218,7 @@ protected: bool isFPOnlySP() const { return FPOnlySP; } bool prefers32BitThumb() const { return Pref32BitThumb; } bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } + bool hasRAS() const { return HasRAS; } bool hasMPExtension() const { return HasMPExtension; } bool hasThumb2DSP() const { return Thumb2DSP; } @@ -218,6 +227,7 @@ protected: const Triple &getTargetTriple() const { return TargetTriple; } + bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } bool isTargetNaCl() const { return TargetTriple.getOS() == Triple::NativeClient; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 61b75cb..44229ad 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -34,6 +34,7 @@ extern "C" void LLVMInitializeARMTarget() { RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget); } + /// TargetMachine ctor - Create an ARM architecture model. /// ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, @@ -50,6 +51,8 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, this->Options.FloatABIType = FloatABI::Soft; } +void ARMTargetMachine::anchor() { } + ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -74,6 +77,8 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, "support ARM mode execution!"); } +void ThumbTargetMachine::anchor() { } + ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -102,33 +107,60 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) { } -bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM) { - if (getOptLevel() != CodeGenOpt::None && EnableGlobalMerge) - PM.add(createGlobalMergePass(getTargetLowering())); +namespace { +/// ARM Code Generator Pass Configuration Options. +class ARMPassConfig : public TargetPassConfig { +public: + ARMPassConfig(ARMBaseTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + ARMBaseTargetMachine &getARMTargetMachine() const { + return getTM<ARMBaseTargetMachine>(); + } + + const ARMSubtarget &getARMSubtarget() const { + return *getARMTargetMachine().getSubtargetImpl(); + } + + virtual bool addPreISel(); + virtual bool addInstSelector(); + virtual bool addPreRegAlloc(); + virtual bool addPreSched2(); + virtual bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { + return new ARMPassConfig(this, PM); +} + +bool ARMPassConfig::addPreISel() { + if (TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge) + PM.add(createGlobalMergePass(TM->getTargetLowering())); return false; } -bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM) { - PM.add(createARMISelDag(*this, getOptLevel())); +bool ARMPassConfig::addInstSelector() { + PM.add(createARMISelDag(getARMTargetMachine(), getOptLevel())); return false; } -bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM) { +bool ARMPassConfig::addPreRegAlloc() { // FIXME: temporarily disabling load / store optimization pass for Thumb1. - if (getOptLevel() != CodeGenOpt::None && !Subtarget.isThumb1Only()) + if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass(true)); - if (getOptLevel() != CodeGenOpt::None && Subtarget.isCortexA9()) + if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9()) PM.add(createMLxExpansionPass()); return true; } -bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM) { +bool ARMPassConfig::addPreSched2() { // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (getOptLevel() != CodeGenOpt::None) { - if (!Subtarget.isThumb1Only()) + if (!getARMSubtarget().isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass()); - if (Subtarget.hasNEON()) + if (getARMSubtarget().hasNEON()) PM.add(createExecutionDependencyFixPass(&ARM::DPRRegClass)); } @@ -137,22 +169,22 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM) { PM.add(createARMExpandPseudoPass()); if (getOptLevel() != CodeGenOpt::None) { - if (!Subtarget.isThumb1Only()) - PM.add(createIfConverterPass()); + if (!getARMSubtarget().isThumb1Only()) + addPass(IfConverterID); } - if (Subtarget.isThumb2()) + if (getARMSubtarget().isThumb2()) PM.add(createThumb2ITBlockPass()); return true; } -bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM) { - if (Subtarget.isThumb2()) { - if (!Subtarget.prefers32BitThumb()) +bool ARMPassConfig::addPreEmitPass() { + if (getARMSubtarget().isThumb2()) { + if (!getARMSubtarget().prefers32BitThumb()) PM.add(createThumb2SizeReductionPass()); // Constant island pass work on unbundled instructions. - PM.add(createUnpackMachineBundlesPass()); + addPass(UnpackMachineBundlesID); } PM.add(createARMConstantIslandPass()); @@ -160,8 +192,7 @@ bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM) { return true; } -bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, - JITCodeEmitter &JCE) { +bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) { // Machine code emitter pass for ARM. PM.add(createARMJITCodeEmitterPass(*this, JCE)); return false; diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index cd77822..abcdb24 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -52,17 +52,15 @@ public: } // Pass Pipeline Configuration - virtual bool addPreISel(PassManagerBase &PM); - virtual bool addInstSelector(PassManagerBase &PM); - virtual bool addPreRegAlloc(PassManagerBase &PM); - virtual bool addPreSched2(PassManagerBase &PM); - virtual bool addPreEmitPass(PassManagerBase &PM); + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE); }; /// ARMTargetMachine - ARM target machine. /// class ARMTargetMachine : public ARMBaseTargetMachine { + virtual void anchor(); ARMInstrInfo InstrInfo; const TargetData DataLayout; // Calculates type size & alignment ARMELFWriterInfo ELFWriterInfo; @@ -103,6 +101,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine { /// Thumb-1 and Thumb-2. /// class ThumbTargetMachine : public ARMBaseTargetMachine { + virtual void anchor(); // Either Thumb1InstrInfo or Thumb2InstrInfo. OwningPtr<ARMBaseInstrInfo> InstrInfo; const TargetData DataLayout; // Calculates type size & alignment diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 721a225..a5ea1c2 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/Dwarf.h" #include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/StringExtras.h" using namespace llvm; using namespace dwarf; @@ -24,8 +25,9 @@ using namespace dwarf; void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFileELF::Initialize(Ctx, TM); + isAAPCS_ABI = TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI(); - if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) { + if (isAAPCS_ABI) { StaticCtorSection = getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY, ELF::SHF_WRITE | @@ -36,7 +38,6 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, ELF::SHF_WRITE | ELF::SHF_ALLOC, SectionKind::getDataRel()); - StructorOutputOrder = Structors::PriorityOrder; LSDASection = NULL; } @@ -46,3 +47,33 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, 0, SectionKind::getMetadata()); } + +const MCSection * +ARMElfTargetObjectFile::getStaticCtorSection(unsigned Priority) const { + if (!isAAPCS_ABI) + return TargetLoweringObjectFileELF::getStaticCtorSection(Priority); + + if (Priority == 65535) + return StaticCtorSection; + + // Emit ctors in priority order. + std::string Name = std::string(".init_array.") + utostr(Priority); + return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY, + ELF::SHF_ALLOC | ELF::SHF_WRITE, + SectionKind::getDataRel()); +} + +const MCSection * +ARMElfTargetObjectFile::getStaticDtorSection(unsigned Priority) const { + if (!isAAPCS_ABI) + return TargetLoweringObjectFileELF::getStaticDtorSection(Priority); + + if (Priority == 65535) + return StaticDtorSection; + + // Emit dtors in priority order. + std::string Name = std::string(".fini_array.") + utostr(Priority); + return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY, + ELF::SHF_ALLOC | ELF::SHF_WRITE, + SectionKind::getDataRel()); +} diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h index c6a7261..ff21060 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.h +++ b/lib/Target/ARM/ARMTargetObjectFile.h @@ -20,6 +20,7 @@ class TargetMachine; class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF { protected: const MCSection *AttributesSection; + bool isAAPCS_ABI; public: ARMElfTargetObjectFile() : TargetLoweringObjectFileELF(), @@ -31,6 +32,9 @@ public: virtual const MCSection *getAttributesSection() const { return AttributesSection; } + + const MCSection * getStaticCtorSection(unsigned Priority) const; + const MCSection * getStaticDtorSection(unsigned Priority) const; }; } // end namespace llvm diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index cd86065..2045482 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -44,6 +44,7 @@ enum VectorLaneTy { NoLanes, AllLanes, IndexedLane }; class ARMAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; + const MCRegisterInfo *MRI; // Map of register aliases registers via the .req directive. StringMap<unsigned> RegisterReqs; @@ -101,6 +102,8 @@ class ARMAsmParser : public MCTargetAsmParser { bool parseDirectiveSyntax(SMLoc L); bool parseDirectiveReq(StringRef Name, SMLoc L); bool parseDirectiveUnreq(SMLoc L); + bool parseDirectiveArch(SMLoc L); + bool parseDirectiveEabiAttr(SMLoc L); StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode, bool &CarrySetting, unsigned &ProcessorIMod, @@ -234,6 +237,9 @@ public: : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { MCAsmParserExtension::Initialize(_Parser); + // Cache the MCRegisterInfo. + MRI = &getContext().getRegisterInfo(); + // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); @@ -268,7 +274,6 @@ class ARMOperand : public MCParsedAsmOperand { k_CoprocReg, k_CoprocOption, k_Immediate, - k_FPImmediate, k_MemBarrierOpt, k_Memory, k_PostIndexRegister, @@ -347,10 +352,6 @@ class ARMOperand : public MCParsedAsmOperand { const MCExpr *Val; } Imm; - struct { - unsigned Val; // encoded 8-bit representation - } FPImm; - /// Combined record for all forms of ARM address expressions. struct { unsigned BaseRegNum; @@ -361,7 +362,7 @@ class ARMOperand : public MCParsedAsmOperand { ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg unsigned ShiftImm; // shift for OffsetReg. unsigned Alignment; // 0 = no alignment specified - // n = alignment in bytes (8, 16, or 32) + // n = alignment in bytes (2, 4, 8, 16, or 32) unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit) } Memory; @@ -436,9 +437,6 @@ public: case k_Immediate: Imm = o.Imm; break; - case k_FPImmediate: - FPImm = o.FPImm; - break; case k_MemBarrierOpt: MBOpt = o.MBOpt; break; @@ -507,15 +505,10 @@ public: } const MCExpr *getImm() const { - assert(Kind == k_Immediate && "Invalid access!"); + assert(isImm() && "Invalid access!"); return Imm.Val; } - unsigned getFPImm() const { - assert(Kind == k_FPImmediate && "Invalid access!"); - return FPImm.Val; - } - unsigned getVectorIndex() const { assert(Kind == k_VectorIndex && "Invalid access!"); return VectorIndex.Val; @@ -544,202 +537,197 @@ public: bool isITMask() const { return Kind == k_ITCondMask; } bool isITCondCode() const { return Kind == k_CondCode; } bool isImm() const { return Kind == k_Immediate; } - bool isFPImm() const { return Kind == k_FPImmediate; } + bool isFPImm() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue())); + return Val != -1; + } + bool isFBits16() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value <= 16; + } + bool isFBits32() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 1 && Value <= 32; + } bool isImm8s4() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020; } bool isImm0_1020s4() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ((Value & 3) == 0) && Value >= 0 && Value <= 1020; } bool isImm0_508s4() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ((Value & 3) == 0) && Value >= 0 && Value <= 508; } bool isImm0_255() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 256; } bool isImm0_1() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 2; } bool isImm0_3() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 4; } bool isImm0_7() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 8; } bool isImm0_15() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 16; } bool isImm0_31() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 32; } bool isImm0_63() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 64; } bool isImm8() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value == 8; } bool isImm16() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value == 16; } bool isImm32() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value == 32; } bool isShrImm8() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value <= 8; } bool isShrImm16() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value <= 16; } bool isShrImm32() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value <= 32; } bool isShrImm64() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value <= 64; } bool isImm1_7() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value < 8; } bool isImm1_15() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value < 16; } bool isImm1_31() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value < 32; } bool isImm1_16() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value < 17; } bool isImm1_32() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value < 33; } bool isImm0_32() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 33; } bool isImm0_65535() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 65536; } bool isImm0_65535Expr() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // If it's not a constant expression, it'll generate a fixup and be // handled later. @@ -748,88 +736,77 @@ public: return Value >= 0 && Value < 65536; } bool isImm24bit() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value <= 0xffffff; } bool isImmThumbSR() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value < 33; } bool isPKHLSLImm() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 32; } bool isPKHASRImm() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value <= 32; } bool isARMSOImm() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ARM_AM::getSOImmVal(Value) != -1; } bool isARMSOImmNot() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ARM_AM::getSOImmVal(~Value) != -1; } bool isARMSOImmNeg() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ARM_AM::getSOImmVal(-Value) != -1; } bool isT2SOImm() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ARM_AM::getT2SOImmVal(Value) != -1; } bool isT2SOImmNot() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ARM_AM::getT2SOImmVal(~Value) != -1; } bool isT2SOImmNeg() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ARM_AM::getT2SOImmVal(-Value) != -1; } bool isSetEndImm() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); @@ -858,6 +835,17 @@ public: return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 && (alignOK || Memory.Alignment == 0); } + bool isMemPCRelImm12() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + return false; + // Base register must be PC. + if (Memory.BaseRegNum != ARM::PC) + return false; + // Immediate offset in range [-4095, 4095]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return (Val > -4096 && Val < 4096) || (Val == INT32_MIN); + } bool isAlignedMemory() const { return isMemNoOffset(true); } @@ -871,8 +859,7 @@ public: return Val > -4096 && Val < 4096; } bool isAM2OffsetImm() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; // Immediate offset in range [-4095, 4095]. const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; @@ -880,6 +867,11 @@ public: return Val > -4096 && Val < 4096; } bool isAddrMode3() const { + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm() && !isa<MCConstantExpr>(getImm())) + return true; if (!isMemory() || Memory.Alignment != 0) return false; // No shifts are legal for AM3. if (Memory.ShiftType != ARM_AM::no_shift) return false; @@ -906,7 +898,7 @@ public: // If we have an immediate that's not a constant, treat it as a label // reference needing a fixup. If it is a constant, it's something else // and we reject it. - if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm())) + if (isImm() && !isa<MCConstantExpr>(getImm())) return true; if (!isMemory() || Memory.Alignment != 0) return false; // Check for register offset. @@ -992,6 +984,11 @@ public: return Val >= 0 && Val <= 1020 && (Val % 4) == 0; } bool isMemImm8s4Offset() const { + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm() && !isa<MCConstantExpr>(getImm())) + return true; if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Immediate offset a multiple of 4 in range [-1020, 1020]. @@ -1010,6 +1007,8 @@ public: bool isMemImm8Offset() const { if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; + // Base reg of PC isn't allowed for these encodings. + if (Memory.BaseRegNum == ARM::PC) return false; // Immediate offset in range [-255, 255]. if (!Memory.OffsetImm) return true; int64_t Val = Memory.OffsetImm->getValue(); @@ -1026,6 +1025,8 @@ public: bool isMemNegImm8Offset() const { if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; + // Base reg of PC isn't allowed for these encodings. + if (Memory.BaseRegNum == ARM::PC) return false; // Immediate offset in range [-255, -1]. if (!Memory.OffsetImm) return false; int64_t Val = Memory.OffsetImm->getValue(); @@ -1043,7 +1044,7 @@ public: // If we have an immediate that's not a constant, treat it as a label // reference needing a fixup. If it is a constant, it's something else // and we reject it. - if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm())) + if (isImm() && !isa<MCConstantExpr>(getImm())) return true; if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) @@ -1054,16 +1055,14 @@ public: return (Val > -4096 && Val < 4096) || (Val == INT32_MIN); } bool isPostIdxImm8() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Val = CE->getValue(); return (Val > -256 && Val < 256) || (Val == INT32_MIN); } bool isPostIdxImm8s4() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Val = CE->getValue(); @@ -1086,9 +1085,10 @@ public: return VectorList.Count == 1; } - bool isVecListTwoD() const { + bool isVecListDPair() const { if (!isSingleSpacedVectorList()) return false; - return VectorList.Count == 2; + return (ARMMCRegisterClasses[ARM::DPairRegClassID] + .contains(VectorList.RegNum)); } bool isVecListThreeD() const { @@ -1106,46 +1106,159 @@ public: return VectorList.Count == 2; } + bool isVecListDPairSpaced() const { + if (!isSingleSpacedVectorList()) return false; + return (ARMMCRegisterClasses[ARM::DPairSpcRegClassID] + .contains(VectorList.RegNum)); + } + + bool isVecListThreeQ() const { + if (!isDoubleSpacedVectorList()) return false; + return VectorList.Count == 3; + } + + bool isVecListFourQ() const { + if (!isDoubleSpacedVectorList()) return false; + return VectorList.Count == 4; + } + + bool isSingleSpacedVectorAllLanes() const { + return Kind == k_VectorListAllLanes && !VectorList.isDoubleSpaced; + } + bool isDoubleSpacedVectorAllLanes() const { + return Kind == k_VectorListAllLanes && VectorList.isDoubleSpaced; + } bool isVecListOneDAllLanes() const { - if (Kind != k_VectorListAllLanes) return false; + if (!isSingleSpacedVectorAllLanes()) return false; return VectorList.Count == 1; } bool isVecListTwoDAllLanes() const { - if (Kind != k_VectorListAllLanes) return false; + if (!isSingleSpacedVectorAllLanes()) return false; + return VectorList.Count == 2; + } + + bool isVecListTwoQAllLanes() const { + if (!isDoubleSpacedVectorAllLanes()) return false; return VectorList.Count == 2; } + bool isVecListThreeDAllLanes() const { + if (!isSingleSpacedVectorAllLanes()) return false; + return VectorList.Count == 3; + } + + bool isVecListThreeQAllLanes() const { + if (!isDoubleSpacedVectorAllLanes()) return false; + return VectorList.Count == 3; + } + + bool isVecListFourDAllLanes() const { + if (!isSingleSpacedVectorAllLanes()) return false; + return VectorList.Count == 4; + } + + bool isVecListFourQAllLanes() const { + if (!isDoubleSpacedVectorAllLanes()) return false; + return VectorList.Count == 4; + } + + bool isSingleSpacedVectorIndexed() const { + return Kind == k_VectorListIndexed && !VectorList.isDoubleSpaced; + } + bool isDoubleSpacedVectorIndexed() const { + return Kind == k_VectorListIndexed && VectorList.isDoubleSpaced; + } bool isVecListOneDByteIndexed() const { - if (Kind != k_VectorListIndexed) return false; + if (!isSingleSpacedVectorIndexed()) return false; return VectorList.Count == 1 && VectorList.LaneIndex <= 7; } bool isVecListOneDHWordIndexed() const { - if (Kind != k_VectorListIndexed) return false; + if (!isSingleSpacedVectorIndexed()) return false; return VectorList.Count == 1 && VectorList.LaneIndex <= 3; } bool isVecListOneDWordIndexed() const { - if (Kind != k_VectorListIndexed) return false; + if (!isSingleSpacedVectorIndexed()) return false; return VectorList.Count == 1 && VectorList.LaneIndex <= 1; } bool isVecListTwoDByteIndexed() const { - if (Kind != k_VectorListIndexed) return false; + if (!isSingleSpacedVectorIndexed()) return false; return VectorList.Count == 2 && VectorList.LaneIndex <= 7; } bool isVecListTwoDHWordIndexed() const { - if (Kind != k_VectorListIndexed) return false; + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 2 && VectorList.LaneIndex <= 3; + } + + bool isVecListTwoQWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 2 && VectorList.LaneIndex <= 1; + } + + bool isVecListTwoQHWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; return VectorList.Count == 2 && VectorList.LaneIndex <= 3; } bool isVecListTwoDWordIndexed() const { - if (Kind != k_VectorListIndexed) return false; + if (!isSingleSpacedVectorIndexed()) return false; return VectorList.Count == 2 && VectorList.LaneIndex <= 1; } + bool isVecListThreeDByteIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 3 && VectorList.LaneIndex <= 7; + } + + bool isVecListThreeDHWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 3 && VectorList.LaneIndex <= 3; + } + + bool isVecListThreeQWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 3 && VectorList.LaneIndex <= 1; + } + + bool isVecListThreeQHWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 3 && VectorList.LaneIndex <= 3; + } + + bool isVecListThreeDWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 3 && VectorList.LaneIndex <= 1; + } + + bool isVecListFourDByteIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 7; + } + + bool isVecListFourDHWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 3; + } + + bool isVecListFourQWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 1; + } + + bool isVecListFourQHWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 3; + } + + bool isVecListFourDWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 1; + } + bool isVectorIndex8() const { if (Kind != k_VectorIndex) return false; return VectorIndex.Val < 8; @@ -1160,8 +1273,7 @@ public: } bool isNEONi8splat() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // Must be a constant. if (!CE) return false; @@ -1172,8 +1284,7 @@ public: } bool isNEONi16splat() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // Must be a constant. if (!CE) return false; @@ -1183,8 +1294,7 @@ public: } bool isNEONi32splat() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // Must be a constant. if (!CE) return false; @@ -1197,8 +1307,7 @@ public: } bool isNEONi32vmov() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // Must be a constant. if (!CE) return false; @@ -1212,10 +1321,24 @@ public: (Value >= 0x01ff && Value <= 0xffff && (Value & 0xff) == 0xff) || (Value >= 0x01ffff && Value <= 0xffffff && (Value & 0xffff) == 0xffff); } + bool isNEONi32vmovNeg() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + // Must be a constant. + if (!CE) return false; + int64_t Value = ~CE->getValue(); + // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X, + // for VMOV/VMVN only, 00Xf or 0Xff are also accepted. + return (Value >= 0 && Value < 256) || + (Value >= 0x0100 && Value <= 0xff00) || + (Value >= 0x010000 && Value <= 0xff0000) || + (Value >= 0x01000000 && Value <= 0xff000000) || + (Value >= 0x01ff && Value <= 0xffff && (Value & 0xff) == 0xff) || + (Value >= 0x01ffff && Value <= 0xffffff && (Value & 0xffff) == 0xffff); + } bool isNEONi64splat() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // Must be a constant. if (!CE) return false; @@ -1341,9 +1464,23 @@ public: addExpr(Inst, getImm()); } + void addFBits16Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(16 - CE->getValue())); + } + + void addFBits32Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(32 - CE->getValue())); + } + void addFPImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getFPImm())); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue())); + Inst.addOperand(MCOperand::CreateImm(Val)); } void addImm8s4Operands(MCInst &Inst, unsigned N) const { @@ -1446,6 +1583,14 @@ public: Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); } + void addMemPCRelImm12Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + int32_t Imm = Memory.OffsetImm->getValue(); + // FIXME: Handle #-0 + if (Imm == INT32_MIN) Imm = 0; + Inst.addOperand(MCOperand::CreateImm(Imm)); + } + void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); @@ -1488,6 +1633,16 @@ public: void addAddrMode3Operands(MCInst &Inst, unsigned N) const { assert(N == 3 && "Invalid number of operands!"); + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm()) { + Inst.addOperand(MCOperand::CreateExpr(getImm())); + Inst.addOperand(MCOperand::CreateReg(0)); + Inst.addOperand(MCOperand::CreateImm(0)); + return; + } + int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; if (!Memory.OffsetRegNum) { ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; @@ -1551,6 +1706,15 @@ public: void addMemImm8s4OffsetOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm()) { + Inst.addOperand(MCOperand::CreateExpr(getImm())); + Inst.addOperand(MCOperand::CreateImm(0)); + return; + } + int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); Inst.addOperand(MCOperand::CreateImm(Val)); @@ -1582,7 +1746,7 @@ public: void addMemUImm12OffsetOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); // If this is an immediate, it's a label reference. - if (Kind == k_Immediate) { + if (isImm()) { addExpr(Inst, getImm()); Inst.addOperand(MCOperand::CreateImm(0)); return; @@ -1597,7 +1761,7 @@ public: void addMemImm12OffsetOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); // If this is an immediate, it's a label reference. - if (Kind == k_Immediate) { + if (isImm()) { addExpr(Inst, getImm()); Inst.addOperand(MCOperand::CreateImm(0)); return; @@ -1796,6 +1960,20 @@ public: Inst.addOperand(MCOperand::CreateImm(Value)); } + void addNEONi32vmovNegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate encodes the type of constant as well as the value. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + unsigned Value = ~CE->getValue(); + if (Value >= 256 && Value <= 0xffff) + Value = (Value >> 8) | ((Value & 0xff) ? 0xc00 : 0x200); + else if (Value > 0xffff && Value <= 0xffffff) + Value = (Value >> 16) | ((Value & 0xff) ? 0xd00 : 0x400); + else if (Value > 0xffffff) + Value = (Value >> 24) | 0x600; + Inst.addOperand(MCOperand::CreateImm(Value)); + } + void addNEONi64splatOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. @@ -1964,21 +2142,26 @@ public: } static ARMOperand *CreateVectorListAllLanes(unsigned RegNum, unsigned Count, + bool isDoubleSpaced, SMLoc S, SMLoc E) { ARMOperand *Op = new ARMOperand(k_VectorListAllLanes); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; + Op->VectorList.isDoubleSpaced = isDoubleSpaced; Op->StartLoc = S; Op->EndLoc = E; return Op; } static ARMOperand *CreateVectorListIndexed(unsigned RegNum, unsigned Count, - unsigned Index, SMLoc S, SMLoc E) { + unsigned Index, + bool isDoubleSpaced, + SMLoc S, SMLoc E) { ARMOperand *Op = new ARMOperand(k_VectorListIndexed); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; Op->VectorList.LaneIndex = Index; + Op->VectorList.isDoubleSpaced = isDoubleSpaced; Op->StartLoc = S; Op->EndLoc = E; return Op; @@ -2001,14 +2184,6 @@ public: return Op; } - static ARMOperand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) { - ARMOperand *Op = new ARMOperand(k_FPImmediate); - Op->FPImm.Val = Val; - Op->StartLoc = S; - Op->EndLoc = S; - return Op; - } - static ARMOperand *CreateMem(unsigned BaseRegNum, const MCConstantExpr *OffsetImm, unsigned OffsetRegNum, @@ -2073,10 +2248,6 @@ public: void ARMOperand::print(raw_ostream &OS) const { switch (Kind) { - case k_FPImmediate: - OS << "<fpimm " << getFPImm() << "(" << ARM_AM::getFPImmFloat(getFPImm()) - << ") >"; - break; case k_CondCode: OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">"; break; @@ -2245,9 +2416,10 @@ int ARMAsmParser::tryParseRegister() { .Default(0); } if (!RegNum) { - // Check for aliases registered via .req. - StringMap<unsigned>::const_iterator Entry = - RegisterReqs.find(Tok.getIdentifier()); + // Check for aliases registered via .req. Canonicalize to lower case. + // That's more consistent since register names are case insensitive, and + // it's how the original entry was passed in from MC/MCParser/AsmParser. + StringMap<unsigned>::const_iterator Entry = RegisterReqs.find(lowerCase); // If no match, return failure. if (Entry == RegisterReqs.end()) return -1; @@ -2327,6 +2499,10 @@ int ARMAsmParser::tryParseShiftRegister( Error(ImmLoc, "immediate shift value out of range"); return -1; } + // shift by zero is a nop. Always send it through as lsl. + // ('as' compatibility) + if (Imm == 0) + ShiftTy = ARM_AM::lsl; } else if (Parser.getTok().is(AsmToken::Identifier)) { ShiftReg = tryParseRegister(); SMLoc L = Parser.getTok().getLoc(); @@ -2385,18 +2561,14 @@ tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { const MCExpr *ImmVal; if (getParser().ParseExpression(ImmVal)) - return MatchOperand_ParseFail; + return true; const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal); - if (!MCE) { - TokError("immediate value expected for vector index"); - return MatchOperand_ParseFail; - } + if (!MCE) + return TokError("immediate value expected for vector index"); SMLoc E = Parser.getTok().getLoc(); - if (Parser.getTok().isNot(AsmToken::RBrac)) { - Error(E, "']' expected"); - return MatchOperand_ParseFail; - } + if (Parser.getTok().isNot(AsmToken::RBrac)) + return Error(E, "']' expected"); Parser.Lex(); // Eat right bracket token. @@ -2415,7 +2587,7 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { // Use the same layout as the tablegen'erated register name matcher. Ugly, // but efficient. switch (Name.size()) { - default: break; + default: return -1; case 2: if (Name[0] != CoprocOp) return -1; @@ -2432,7 +2604,6 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { case '8': return 8; case '9': return 9; } - break; case 3: if (Name[0] != CoprocOp || Name[1] != '1') return -1; @@ -2445,10 +2616,7 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { case '4': return 14; case '5': return 15; } - break; } - - return -1; } /// parseITCondCode - Try to parse a condition code for an IT instruction. @@ -2568,7 +2736,7 @@ static unsigned getNextRegister(unsigned Reg) { if (!ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) return Reg + 1; switch(Reg) { - default: assert(0 && "Invalid GPR number!"); + default: llvm_unreachable("Invalid GPR number!"); case ARM::R0: return ARM::R1; case ARM::R1: return ARM::R2; case ARM::R2: return ARM::R3; case ARM::R3: return ARM::R4; case ARM::R4: return ARM::R5; case ARM::R5: return ARM::R6; @@ -2737,21 +2905,32 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) { Parser.Lex(); // Eat the ']'. return MatchOperand_Success; } - if (Parser.getTok().is(AsmToken::Integer)) { - int64_t Val = Parser.getTok().getIntVal(); - // Make this range check context sensitive for .8, .16, .32. - if (Val < 0 && Val > 7) - Error(Parser.getTok().getLoc(), "lane index out of range"); - Index = Val; - LaneKind = IndexedLane; - Parser.Lex(); // Eat the token; - if (Parser.getTok().isNot(AsmToken::RBrac)) - Error(Parser.getTok().getLoc(), "']' expected"); - Parser.Lex(); // Eat the ']'. - return MatchOperand_Success; + const MCExpr *LaneIndex; + SMLoc Loc = Parser.getTok().getLoc(); + if (getParser().ParseExpression(LaneIndex)) { + Error(Loc, "illegal expression"); + return MatchOperand_ParseFail; } - Error(Parser.getTok().getLoc(), "lane index must be empty or an integer"); - return MatchOperand_ParseFail; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(LaneIndex); + if (!CE) { + Error(Loc, "lane index must be empty or an integer"); + return MatchOperand_ParseFail; + } + if (Parser.getTok().isNot(AsmToken::RBrac)) { + Error(Parser.getTok().getLoc(), "']' expected"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat the ']'. + int64_t Val = CE->getValue(); + + // FIXME: Make this range check context sensitive for .8, .16, .32. + if (Val < 0 || Val > 7) { + Error(Parser.getTok().getLoc(), "lane index out of range"); + return MatchOperand_ParseFail; + } + Index = Val; + LaneKind = IndexedLane; + return MatchOperand_Success; } LaneKind = NoLanes; return MatchOperand_Success; @@ -2776,19 +2955,19 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (Res != MatchOperand_Success) return Res; switch (LaneKind) { - default: - assert(0 && "unexpected lane kind!"); case NoLanes: E = Parser.getTok().getLoc(); Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E)); break; case AllLanes: E = Parser.getTok().getLoc(); - Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, S, E)); + Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, false, + S, E)); break; case IndexedLane: Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 1, - LaneIndex, S,E)); + LaneIndex, + false, S, E)); break; } return MatchOperand_Success; @@ -2799,19 +2978,22 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (Res != MatchOperand_Success) return Res; switch (LaneKind) { - default: - assert(0 && "unexpected lane kind!"); case NoLanes: E = Parser.getTok().getLoc(); + Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0, + &ARMMCRegisterClasses[ARM::DPairRegClassID]); + Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E)); break; case AllLanes: E = Parser.getTok().getLoc(); - Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, S, E)); + Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false, + S, E)); break; case IndexedLane: Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 2, - LaneIndex, S,E)); + LaneIndex, + false, S, E)); break; } return MatchOperand_Success; @@ -2959,11 +3141,6 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Error(EndLoc, "mismatched lane index in register list"); return MatchOperand_ParseFail; } - if (Spacing == 2 && LaneKind != NoLanes) { - Error(EndLoc, - "lane index specfier invalid in double spaced register list"); - return MatchOperand_ParseFail; - } } SMLoc E = Parser.getTok().getLoc(); @@ -2974,19 +3151,29 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat '}' token. switch (LaneKind) { - default: - assert(0 && "unexpected lane kind in register list."); case NoLanes: + // Non-lane two-register operands have been converted to the + // composite register classes. + if (Count == 2) { + const MCRegisterClass *RC = (Spacing == 1) ? + &ARMMCRegisterClasses[ARM::DPairRegClassID] : + &ARMMCRegisterClasses[ARM::DPairSpcRegClassID]; + FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC); + } + Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count, (Spacing == 2), S, E)); break; case AllLanes: Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count, + (Spacing == 2), S, E)); break; case IndexedLane: Operands.push_back(ARMOperand::CreateVectorListIndexed(FirstReg, Count, - LaneIndex, S, E)); + LaneIndex, + (Spacing == 2), + S, E)); break; } return MatchOperand_Success; @@ -3082,14 +3269,14 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { .Case("faultmask", 19) .Case("control", 20) .Default(~0U); - + if (FlagsVal == ~0U) return MatchOperand_NoMatch; if (!hasV7Ops() && FlagsVal >= 17 && FlagsVal <= 19) // basepri, basepri_max and faultmask only valid for V7m. return MatchOperand_NoMatch; - + Parser.Lex(); // Eat identifier token. Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S)); return MatchOperand_Success; @@ -3954,7 +4141,10 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { unsigned Align = 0; switch (CE->getValue()) { default: - return Error(E, "alignment specifier must be 64, 128, or 256 bits"); + return Error(E, + "alignment specifier must be 16, 32, 64, 128, or 256 bits"); + case 16: Align = 2; break; + case 32: Align = 4; break; case 64: Align = 8; break; case 128: Align = 16; break; case 256: Align = 32; break; @@ -4135,6 +4325,15 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, /// parseFPImm - A floating point immediate expression operand. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Anything that can accept a floating point constant as an operand + // needs to go through here, as the regular ParseExpression is + // integer only. + // + // This routine still creates a generic Immediate operand, containing + // a bitcast of the 64-bit floating point value. The various operands + // that accept floats can check whether the value is valid for them + // via the standard is*() predicates. + SMLoc S = Parser.getTok().getLoc(); if (Parser.getTok().isNot(AsmToken::Hash) && @@ -4165,34 +4364,39 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); } const AsmToken &Tok = Parser.getTok(); + SMLoc Loc = Tok.getLoc(); if (Tok.is(AsmToken::Real)) { - APFloat RealVal(APFloat::IEEEdouble, Tok.getString()); + APFloat RealVal(APFloat::IEEEsingle, Tok.getString()); uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); // If we had a '-' in front, toggle the sign bit. - IntVal ^= (uint64_t)isNegative << 63; - int Val = ARM_AM::getFP64Imm(APInt(64, IntVal)); + IntVal ^= (uint64_t)isNegative << 31; Parser.Lex(); // Eat the token. - if (Val == -1) { - TokError("floating point value out of range"); - return MatchOperand_ParseFail; - } - Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext())); + Operands.push_back(ARMOperand::CreateImm( + MCConstantExpr::Create(IntVal, getContext()), + S, Parser.getTok().getLoc())); return MatchOperand_Success; } + // Also handle plain integers. Instructions which allow floating point + // immediates also allow a raw encoded 8-bit value. if (Tok.is(AsmToken::Integer)) { int64_t Val = Tok.getIntVal(); Parser.Lex(); // Eat the token. if (Val > 255 || Val < 0) { - TokError("encoded floating point value out of range"); + Error(Loc, "encoded floating point value out of range"); return MatchOperand_ParseFail; } - Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext())); + double RealVal = ARM_AM::getFPImmFloat(Val); + Val = APFloat(APFloat::IEEEdouble, RealVal).bitcastToAPInt().getZExtValue(); + Operands.push_back(ARMOperand::CreateImm( + MCConstantExpr::Create(Val, getContext()), S, + Parser.getTok().getLoc())); return MatchOperand_Success; } - TokError("invalid floating point immediate"); + Error(Loc, "invalid floating point immediate"); return MatchOperand_ParseFail; } + /// Parse a arm instruction operand. For now this parses the operand regardless /// of the mnemonic. bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, @@ -4215,7 +4419,6 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, Error(Parser.getTok().getLoc(), "unexpected token in operand"); return true; case AsmToken::Identifier: { - // If this is VMRS, check for the apsr_nzcv operand. if (!tryParseRegisterWithWriteBack(Operands)) return false; int Res = tryParseShiftRegister(Operands); @@ -4223,6 +4426,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, return false; else if (Res == -1) // irrecoverable error return true; + // If this is VMRS, check for the apsr_nzcv operand. if (Mnemonic == "vmrs" && Parser.getTok().getString() == "apsr_nzcv") { S = Parser.getTok().getLoc(); Parser.Lex(); @@ -4349,7 +4553,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" || Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" || - Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal") + Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" || + Mnemonic == "fmuls") return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -4392,7 +4597,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" || Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" || Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" || - Mnemonic == "fsts" || + Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" || + Mnemonic == "fmuls" || Mnemonic == "fcmps" || (Mnemonic == "movs" && isThumb()))) { Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1); CarrySetting = true; @@ -4521,9 +4727,11 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, // // If either register is a high reg, it's either one of the SP // variants (handled above) or a 32-bit encoding, so we just - // check against T3. + // check against T3. If the second register is the PC, this is an + // alternate form of ADR, which uses encoding T4, so check for that too. if ((!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) || !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg())) && + static_cast<ARMOperand*>(Operands[4])->getReg() != ARM::PC && static_cast<ARMOperand*>(Operands[5])->isT2SOImm()) return false; // If both registers are low, we're in an IT block, and the immediate is @@ -4891,10 +5099,11 @@ validateInstruction(MCInst &Inst, const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); SMLoc Loc = Operands[0]->getStartLoc(); // Check the IT block state first. - // NOTE: In Thumb mode, the BKPT instruction has the interesting property of - // being allowed in IT blocks, but not being predicable. It just always + // NOTE: BKPT instruction has the interesting property of being + // allowed in IT blocks, but not being predicable. It just always // executes. - if (inITBlock() && Inst.getOpcode() != ARM::tBKPT) { + if (inITBlock() && Inst.getOpcode() != ARM::tBKPT && + Inst.getOpcode() != ARM::BKPT) { unsigned bit = 1; if (ITState.FirstCond) ITState.FirstCond = false; @@ -5034,165 +5243,259 @@ validateInstruction(MCInst &Inst, return false; } -static unsigned getRealVSTLNOpcode(unsigned Opc) { +static unsigned getRealVSTOpcode(unsigned Opc, unsigned &Spacing) { switch(Opc) { - default: assert(0 && "unexpected opcode!"); + default: llvm_unreachable("unexpected opcode!"); // VST1LN - case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_P8: - case ARM::VST1LNdWB_fixed_Asm_I8: case ARM::VST1LNdWB_fixed_Asm_S8: - case ARM::VST1LNdWB_fixed_Asm_U8: - return ARM::VST1LNd8_UPD; - case ARM::VST1LNdWB_fixed_Asm_16: case ARM::VST1LNdWB_fixed_Asm_P16: - case ARM::VST1LNdWB_fixed_Asm_I16: case ARM::VST1LNdWB_fixed_Asm_S16: - case ARM::VST1LNdWB_fixed_Asm_U16: - return ARM::VST1LNd16_UPD; - case ARM::VST1LNdWB_fixed_Asm_32: case ARM::VST1LNdWB_fixed_Asm_F: - case ARM::VST1LNdWB_fixed_Asm_F32: case ARM::VST1LNdWB_fixed_Asm_I32: - case ARM::VST1LNdWB_fixed_Asm_S32: case ARM::VST1LNdWB_fixed_Asm_U32: - return ARM::VST1LNd32_UPD; - case ARM::VST1LNdWB_register_Asm_8: case ARM::VST1LNdWB_register_Asm_P8: - case ARM::VST1LNdWB_register_Asm_I8: case ARM::VST1LNdWB_register_Asm_S8: - case ARM::VST1LNdWB_register_Asm_U8: - return ARM::VST1LNd8_UPD; - case ARM::VST1LNdWB_register_Asm_16: case ARM::VST1LNdWB_register_Asm_P16: - case ARM::VST1LNdWB_register_Asm_I16: case ARM::VST1LNdWB_register_Asm_S16: - case ARM::VST1LNdWB_register_Asm_U16: - return ARM::VST1LNd16_UPD; - case ARM::VST1LNdWB_register_Asm_32: case ARM::VST1LNdWB_register_Asm_F: - case ARM::VST1LNdWB_register_Asm_F32: case ARM::VST1LNdWB_register_Asm_I32: - case ARM::VST1LNdWB_register_Asm_S32: case ARM::VST1LNdWB_register_Asm_U32: - return ARM::VST1LNd32_UPD; - case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_P8: - case ARM::VST1LNdAsm_I8: case ARM::VST1LNdAsm_S8: - case ARM::VST1LNdAsm_U8: - return ARM::VST1LNd8; - case ARM::VST1LNdAsm_16: case ARM::VST1LNdAsm_P16: - case ARM::VST1LNdAsm_I16: case ARM::VST1LNdAsm_S16: - case ARM::VST1LNdAsm_U16: - return ARM::VST1LNd16; - case ARM::VST1LNdAsm_32: case ARM::VST1LNdAsm_F: - case ARM::VST1LNdAsm_F32: case ARM::VST1LNdAsm_I32: - case ARM::VST1LNdAsm_S32: case ARM::VST1LNdAsm_U32: - return ARM::VST1LNd32; + case ARM::VST1LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST1LNd8_UPD; + case ARM::VST1LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST1LNd16_UPD; + case ARM::VST1LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST1LNd32_UPD; + case ARM::VST1LNdWB_register_Asm_8: Spacing = 1; return ARM::VST1LNd8_UPD; + case ARM::VST1LNdWB_register_Asm_16: Spacing = 1; return ARM::VST1LNd16_UPD; + case ARM::VST1LNdWB_register_Asm_32: Spacing = 1; return ARM::VST1LNd32_UPD; + case ARM::VST1LNdAsm_8: Spacing = 1; return ARM::VST1LNd8; + case ARM::VST1LNdAsm_16: Spacing = 1; return ARM::VST1LNd16; + case ARM::VST1LNdAsm_32: Spacing = 1; return ARM::VST1LNd32; // VST2LN - case ARM::VST2LNdWB_fixed_Asm_8: case ARM::VST2LNdWB_fixed_Asm_P8: - case ARM::VST2LNdWB_fixed_Asm_I8: case ARM::VST2LNdWB_fixed_Asm_S8: - case ARM::VST2LNdWB_fixed_Asm_U8: - return ARM::VST2LNd8_UPD; - case ARM::VST2LNdWB_fixed_Asm_16: case ARM::VST2LNdWB_fixed_Asm_P16: - case ARM::VST2LNdWB_fixed_Asm_I16: case ARM::VST2LNdWB_fixed_Asm_S16: - case ARM::VST2LNdWB_fixed_Asm_U16: - return ARM::VST2LNd16_UPD; - case ARM::VST2LNdWB_fixed_Asm_32: case ARM::VST2LNdWB_fixed_Asm_F: - case ARM::VST2LNdWB_fixed_Asm_F32: case ARM::VST2LNdWB_fixed_Asm_I32: - case ARM::VST2LNdWB_fixed_Asm_S32: case ARM::VST2LNdWB_fixed_Asm_U32: - return ARM::VST2LNd32_UPD; - case ARM::VST2LNdWB_register_Asm_8: case ARM::VST2LNdWB_register_Asm_P8: - case ARM::VST2LNdWB_register_Asm_I8: case ARM::VST2LNdWB_register_Asm_S8: - case ARM::VST2LNdWB_register_Asm_U8: - return ARM::VST2LNd8_UPD; - case ARM::VST2LNdWB_register_Asm_16: case ARM::VST2LNdWB_register_Asm_P16: - case ARM::VST2LNdWB_register_Asm_I16: case ARM::VST2LNdWB_register_Asm_S16: - case ARM::VST2LNdWB_register_Asm_U16: - return ARM::VST2LNd16_UPD; - case ARM::VST2LNdWB_register_Asm_32: case ARM::VST2LNdWB_register_Asm_F: - case ARM::VST2LNdWB_register_Asm_F32: case ARM::VST2LNdWB_register_Asm_I32: - case ARM::VST2LNdWB_register_Asm_S32: case ARM::VST2LNdWB_register_Asm_U32: - return ARM::VST2LNd32_UPD; - case ARM::VST2LNdAsm_8: case ARM::VST2LNdAsm_P8: - case ARM::VST2LNdAsm_I8: case ARM::VST2LNdAsm_S8: - case ARM::VST2LNdAsm_U8: - return ARM::VST2LNd8; - case ARM::VST2LNdAsm_16: case ARM::VST2LNdAsm_P16: - case ARM::VST2LNdAsm_I16: case ARM::VST2LNdAsm_S16: - case ARM::VST2LNdAsm_U16: - return ARM::VST2LNd16; - case ARM::VST2LNdAsm_32: case ARM::VST2LNdAsm_F: - case ARM::VST2LNdAsm_F32: case ARM::VST2LNdAsm_I32: - case ARM::VST2LNdAsm_S32: case ARM::VST2LNdAsm_U32: - return ARM::VST2LNd32; + case ARM::VST2LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST2LNd8_UPD; + case ARM::VST2LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST2LNd16_UPD; + case ARM::VST2LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST2LNd32_UPD; + case ARM::VST2LNqWB_fixed_Asm_16: Spacing = 2; return ARM::VST2LNq16_UPD; + case ARM::VST2LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST2LNq32_UPD; + + case ARM::VST2LNdWB_register_Asm_8: Spacing = 1; return ARM::VST2LNd8_UPD; + case ARM::VST2LNdWB_register_Asm_16: Spacing = 1; return ARM::VST2LNd16_UPD; + case ARM::VST2LNdWB_register_Asm_32: Spacing = 1; return ARM::VST2LNd32_UPD; + case ARM::VST2LNqWB_register_Asm_16: Spacing = 2; return ARM::VST2LNq16_UPD; + case ARM::VST2LNqWB_register_Asm_32: Spacing = 2; return ARM::VST2LNq32_UPD; + + case ARM::VST2LNdAsm_8: Spacing = 1; return ARM::VST2LNd8; + case ARM::VST2LNdAsm_16: Spacing = 1; return ARM::VST2LNd16; + case ARM::VST2LNdAsm_32: Spacing = 1; return ARM::VST2LNd32; + case ARM::VST2LNqAsm_16: Spacing = 2; return ARM::VST2LNq16; + case ARM::VST2LNqAsm_32: Spacing = 2; return ARM::VST2LNq32; + + // VST3LN + case ARM::VST3LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST3LNd8_UPD; + case ARM::VST3LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST3LNd16_UPD; + case ARM::VST3LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST3LNd32_UPD; + case ARM::VST3LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VST3LNq16_UPD; + case ARM::VST3LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST3LNq32_UPD; + case ARM::VST3LNdWB_register_Asm_8: Spacing = 1; return ARM::VST3LNd8_UPD; + case ARM::VST3LNdWB_register_Asm_16: Spacing = 1; return ARM::VST3LNd16_UPD; + case ARM::VST3LNdWB_register_Asm_32: Spacing = 1; return ARM::VST3LNd32_UPD; + case ARM::VST3LNqWB_register_Asm_16: Spacing = 2; return ARM::VST3LNq16_UPD; + case ARM::VST3LNqWB_register_Asm_32: Spacing = 2; return ARM::VST3LNq32_UPD; + case ARM::VST3LNdAsm_8: Spacing = 1; return ARM::VST3LNd8; + case ARM::VST3LNdAsm_16: Spacing = 1; return ARM::VST3LNd16; + case ARM::VST3LNdAsm_32: Spacing = 1; return ARM::VST3LNd32; + case ARM::VST3LNqAsm_16: Spacing = 2; return ARM::VST3LNq16; + case ARM::VST3LNqAsm_32: Spacing = 2; return ARM::VST3LNq32; + + // VST3 + case ARM::VST3dWB_fixed_Asm_8: Spacing = 1; return ARM::VST3d8_UPD; + case ARM::VST3dWB_fixed_Asm_16: Spacing = 1; return ARM::VST3d16_UPD; + case ARM::VST3dWB_fixed_Asm_32: Spacing = 1; return ARM::VST3d32_UPD; + case ARM::VST3qWB_fixed_Asm_8: Spacing = 2; return ARM::VST3q8_UPD; + case ARM::VST3qWB_fixed_Asm_16: Spacing = 2; return ARM::VST3q16_UPD; + case ARM::VST3qWB_fixed_Asm_32: Spacing = 2; return ARM::VST3q32_UPD; + case ARM::VST3dWB_register_Asm_8: Spacing = 1; return ARM::VST3d8_UPD; + case ARM::VST3dWB_register_Asm_16: Spacing = 1; return ARM::VST3d16_UPD; + case ARM::VST3dWB_register_Asm_32: Spacing = 1; return ARM::VST3d32_UPD; + case ARM::VST3qWB_register_Asm_8: Spacing = 2; return ARM::VST3q8_UPD; + case ARM::VST3qWB_register_Asm_16: Spacing = 2; return ARM::VST3q16_UPD; + case ARM::VST3qWB_register_Asm_32: Spacing = 2; return ARM::VST3q32_UPD; + case ARM::VST3dAsm_8: Spacing = 1; return ARM::VST3d8; + case ARM::VST3dAsm_16: Spacing = 1; return ARM::VST3d16; + case ARM::VST3dAsm_32: Spacing = 1; return ARM::VST3d32; + case ARM::VST3qAsm_8: Spacing = 2; return ARM::VST3q8; + case ARM::VST3qAsm_16: Spacing = 2; return ARM::VST3q16; + case ARM::VST3qAsm_32: Spacing = 2; return ARM::VST3q32; + + // VST4LN + case ARM::VST4LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST4LNd8_UPD; + case ARM::VST4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST4LNd16_UPD; + case ARM::VST4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST4LNd32_UPD; + case ARM::VST4LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VST4LNq16_UPD; + case ARM::VST4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST4LNq32_UPD; + case ARM::VST4LNdWB_register_Asm_8: Spacing = 1; return ARM::VST4LNd8_UPD; + case ARM::VST4LNdWB_register_Asm_16: Spacing = 1; return ARM::VST4LNd16_UPD; + case ARM::VST4LNdWB_register_Asm_32: Spacing = 1; return ARM::VST4LNd32_UPD; + case ARM::VST4LNqWB_register_Asm_16: Spacing = 2; return ARM::VST4LNq16_UPD; + case ARM::VST4LNqWB_register_Asm_32: Spacing = 2; return ARM::VST4LNq32_UPD; + case ARM::VST4LNdAsm_8: Spacing = 1; return ARM::VST4LNd8; + case ARM::VST4LNdAsm_16: Spacing = 1; return ARM::VST4LNd16; + case ARM::VST4LNdAsm_32: Spacing = 1; return ARM::VST4LNd32; + case ARM::VST4LNqAsm_16: Spacing = 2; return ARM::VST4LNq16; + case ARM::VST4LNqAsm_32: Spacing = 2; return ARM::VST4LNq32; + + // VST4 + case ARM::VST4dWB_fixed_Asm_8: Spacing = 1; return ARM::VST4d8_UPD; + case ARM::VST4dWB_fixed_Asm_16: Spacing = 1; return ARM::VST4d16_UPD; + case ARM::VST4dWB_fixed_Asm_32: Spacing = 1; return ARM::VST4d32_UPD; + case ARM::VST4qWB_fixed_Asm_8: Spacing = 2; return ARM::VST4q8_UPD; + case ARM::VST4qWB_fixed_Asm_16: Spacing = 2; return ARM::VST4q16_UPD; + case ARM::VST4qWB_fixed_Asm_32: Spacing = 2; return ARM::VST4q32_UPD; + case ARM::VST4dWB_register_Asm_8: Spacing = 1; return ARM::VST4d8_UPD; + case ARM::VST4dWB_register_Asm_16: Spacing = 1; return ARM::VST4d16_UPD; + case ARM::VST4dWB_register_Asm_32: Spacing = 1; return ARM::VST4d32_UPD; + case ARM::VST4qWB_register_Asm_8: Spacing = 2; return ARM::VST4q8_UPD; + case ARM::VST4qWB_register_Asm_16: Spacing = 2; return ARM::VST4q16_UPD; + case ARM::VST4qWB_register_Asm_32: Spacing = 2; return ARM::VST4q32_UPD; + case ARM::VST4dAsm_8: Spacing = 1; return ARM::VST4d8; + case ARM::VST4dAsm_16: Spacing = 1; return ARM::VST4d16; + case ARM::VST4dAsm_32: Spacing = 1; return ARM::VST4d32; + case ARM::VST4qAsm_8: Spacing = 2; return ARM::VST4q8; + case ARM::VST4qAsm_16: Spacing = 2; return ARM::VST4q16; + case ARM::VST4qAsm_32: Spacing = 2; return ARM::VST4q32; } } -static unsigned getRealVLDLNOpcode(unsigned Opc) { +static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) { switch(Opc) { - default: assert(0 && "unexpected opcode!"); + default: llvm_unreachable("unexpected opcode!"); // VLD1LN - case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_P8: - case ARM::VLD1LNdWB_fixed_Asm_I8: case ARM::VLD1LNdWB_fixed_Asm_S8: - case ARM::VLD1LNdWB_fixed_Asm_U8: - return ARM::VLD1LNd8_UPD; - case ARM::VLD1LNdWB_fixed_Asm_16: case ARM::VLD1LNdWB_fixed_Asm_P16: - case ARM::VLD1LNdWB_fixed_Asm_I16: case ARM::VLD1LNdWB_fixed_Asm_S16: - case ARM::VLD1LNdWB_fixed_Asm_U16: - return ARM::VLD1LNd16_UPD; - case ARM::VLD1LNdWB_fixed_Asm_32: case ARM::VLD1LNdWB_fixed_Asm_F: - case ARM::VLD1LNdWB_fixed_Asm_F32: case ARM::VLD1LNdWB_fixed_Asm_I32: - case ARM::VLD1LNdWB_fixed_Asm_S32: case ARM::VLD1LNdWB_fixed_Asm_U32: - return ARM::VLD1LNd32_UPD; - case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_P8: - case ARM::VLD1LNdWB_register_Asm_I8: case ARM::VLD1LNdWB_register_Asm_S8: - case ARM::VLD1LNdWB_register_Asm_U8: - return ARM::VLD1LNd8_UPD; - case ARM::VLD1LNdWB_register_Asm_16: case ARM::VLD1LNdWB_register_Asm_P16: - case ARM::VLD1LNdWB_register_Asm_I16: case ARM::VLD1LNdWB_register_Asm_S16: - case ARM::VLD1LNdWB_register_Asm_U16: - return ARM::VLD1LNd16_UPD; - case ARM::VLD1LNdWB_register_Asm_32: case ARM::VLD1LNdWB_register_Asm_F: - case ARM::VLD1LNdWB_register_Asm_F32: case ARM::VLD1LNdWB_register_Asm_I32: - case ARM::VLD1LNdWB_register_Asm_S32: case ARM::VLD1LNdWB_register_Asm_U32: - return ARM::VLD1LNd32_UPD; - case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_P8: - case ARM::VLD1LNdAsm_I8: case ARM::VLD1LNdAsm_S8: - case ARM::VLD1LNdAsm_U8: - return ARM::VLD1LNd8; - case ARM::VLD1LNdAsm_16: case ARM::VLD1LNdAsm_P16: - case ARM::VLD1LNdAsm_I16: case ARM::VLD1LNdAsm_S16: - case ARM::VLD1LNdAsm_U16: - return ARM::VLD1LNd16; - case ARM::VLD1LNdAsm_32: case ARM::VLD1LNdAsm_F: - case ARM::VLD1LNdAsm_F32: case ARM::VLD1LNdAsm_I32: - case ARM::VLD1LNdAsm_S32: case ARM::VLD1LNdAsm_U32: - return ARM::VLD1LNd32; + case ARM::VLD1LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD1LNd8_UPD; + case ARM::VLD1LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD1LNd16_UPD; + case ARM::VLD1LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD1LNd32_UPD; + case ARM::VLD1LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD1LNd8_UPD; + case ARM::VLD1LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD1LNd16_UPD; + case ARM::VLD1LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD1LNd32_UPD; + case ARM::VLD1LNdAsm_8: Spacing = 1; return ARM::VLD1LNd8; + case ARM::VLD1LNdAsm_16: Spacing = 1; return ARM::VLD1LNd16; + case ARM::VLD1LNdAsm_32: Spacing = 1; return ARM::VLD1LNd32; // VLD2LN - case ARM::VLD2LNdWB_fixed_Asm_8: case ARM::VLD2LNdWB_fixed_Asm_P8: - case ARM::VLD2LNdWB_fixed_Asm_I8: case ARM::VLD2LNdWB_fixed_Asm_S8: - case ARM::VLD2LNdWB_fixed_Asm_U8: - return ARM::VLD2LNd8_UPD; - case ARM::VLD2LNdWB_fixed_Asm_16: case ARM::VLD2LNdWB_fixed_Asm_P16: - case ARM::VLD2LNdWB_fixed_Asm_I16: case ARM::VLD2LNdWB_fixed_Asm_S16: - case ARM::VLD2LNdWB_fixed_Asm_U16: - return ARM::VLD2LNd16_UPD; - case ARM::VLD2LNdWB_fixed_Asm_32: case ARM::VLD2LNdWB_fixed_Asm_F: - case ARM::VLD2LNdWB_fixed_Asm_F32: case ARM::VLD2LNdWB_fixed_Asm_I32: - case ARM::VLD2LNdWB_fixed_Asm_S32: case ARM::VLD2LNdWB_fixed_Asm_U32: - return ARM::VLD2LNd32_UPD; - case ARM::VLD2LNdWB_register_Asm_8: case ARM::VLD2LNdWB_register_Asm_P8: - case ARM::VLD2LNdWB_register_Asm_I8: case ARM::VLD2LNdWB_register_Asm_S8: - case ARM::VLD2LNdWB_register_Asm_U8: - return ARM::VLD2LNd8_UPD; - case ARM::VLD2LNdWB_register_Asm_16: case ARM::VLD2LNdWB_register_Asm_P16: - case ARM::VLD2LNdWB_register_Asm_I16: case ARM::VLD2LNdWB_register_Asm_S16: - case ARM::VLD2LNdWB_register_Asm_U16: - return ARM::VLD2LNd16_UPD; - case ARM::VLD2LNdWB_register_Asm_32: case ARM::VLD2LNdWB_register_Asm_F: - case ARM::VLD2LNdWB_register_Asm_F32: case ARM::VLD2LNdWB_register_Asm_I32: - case ARM::VLD2LNdWB_register_Asm_S32: case ARM::VLD2LNdWB_register_Asm_U32: - return ARM::VLD2LNd32_UPD; - case ARM::VLD2LNdAsm_8: case ARM::VLD2LNdAsm_P8: - case ARM::VLD2LNdAsm_I8: case ARM::VLD2LNdAsm_S8: - case ARM::VLD2LNdAsm_U8: - return ARM::VLD2LNd8; - case ARM::VLD2LNdAsm_16: case ARM::VLD2LNdAsm_P16: - case ARM::VLD2LNdAsm_I16: case ARM::VLD2LNdAsm_S16: - case ARM::VLD2LNdAsm_U16: - return ARM::VLD2LNd16; - case ARM::VLD2LNdAsm_32: case ARM::VLD2LNdAsm_F: - case ARM::VLD2LNdAsm_F32: case ARM::VLD2LNdAsm_I32: - case ARM::VLD2LNdAsm_S32: case ARM::VLD2LNdAsm_U32: - return ARM::VLD2LNd32; + case ARM::VLD2LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD2LNd8_UPD; + case ARM::VLD2LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD2LNd16_UPD; + case ARM::VLD2LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD2LNd32_UPD; + case ARM::VLD2LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD2LNq16_UPD; + case ARM::VLD2LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD2LNq32_UPD; + case ARM::VLD2LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD2LNd8_UPD; + case ARM::VLD2LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD2LNd16_UPD; + case ARM::VLD2LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD2LNd32_UPD; + case ARM::VLD2LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD2LNq16_UPD; + case ARM::VLD2LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD2LNq32_UPD; + case ARM::VLD2LNdAsm_8: Spacing = 1; return ARM::VLD2LNd8; + case ARM::VLD2LNdAsm_16: Spacing = 1; return ARM::VLD2LNd16; + case ARM::VLD2LNdAsm_32: Spacing = 1; return ARM::VLD2LNd32; + case ARM::VLD2LNqAsm_16: Spacing = 2; return ARM::VLD2LNq16; + case ARM::VLD2LNqAsm_32: Spacing = 2; return ARM::VLD2LNq32; + + // VLD3DUP + case ARM::VLD3DUPdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD; + case ARM::VLD3DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD; + case ARM::VLD3DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD; + case ARM::VLD3DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPq8_UPD; + case ARM::VLD3DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPq16_UPD; + case ARM::VLD3DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD; + case ARM::VLD3DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD; + case ARM::VLD3DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD; + case ARM::VLD3DUPdWB_register_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD; + case ARM::VLD3DUPqWB_register_Asm_8: Spacing = 2; return ARM::VLD3DUPq8_UPD; + case ARM::VLD3DUPqWB_register_Asm_16: Spacing = 2; return ARM::VLD3DUPq16_UPD; + case ARM::VLD3DUPqWB_register_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD; + case ARM::VLD3DUPdAsm_8: Spacing = 1; return ARM::VLD3DUPd8; + case ARM::VLD3DUPdAsm_16: Spacing = 1; return ARM::VLD3DUPd16; + case ARM::VLD3DUPdAsm_32: Spacing = 1; return ARM::VLD3DUPd32; + case ARM::VLD3DUPqAsm_8: Spacing = 2; return ARM::VLD3DUPq8; + case ARM::VLD3DUPqAsm_16: Spacing = 2; return ARM::VLD3DUPq16; + case ARM::VLD3DUPqAsm_32: Spacing = 2; return ARM::VLD3DUPq32; + + // VLD3LN + case ARM::VLD3LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3LNd8_UPD; + case ARM::VLD3LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3LNd16_UPD; + case ARM::VLD3LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3LNd32_UPD; + case ARM::VLD3LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3LNq16_UPD; + case ARM::VLD3LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3LNq32_UPD; + case ARM::VLD3LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD3LNd8_UPD; + case ARM::VLD3LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD3LNd16_UPD; + case ARM::VLD3LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD3LNd32_UPD; + case ARM::VLD3LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD3LNq16_UPD; + case ARM::VLD3LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD3LNq32_UPD; + case ARM::VLD3LNdAsm_8: Spacing = 1; return ARM::VLD3LNd8; + case ARM::VLD3LNdAsm_16: Spacing = 1; return ARM::VLD3LNd16; + case ARM::VLD3LNdAsm_32: Spacing = 1; return ARM::VLD3LNd32; + case ARM::VLD3LNqAsm_16: Spacing = 2; return ARM::VLD3LNq16; + case ARM::VLD3LNqAsm_32: Spacing = 2; return ARM::VLD3LNq32; + + // VLD3 + case ARM::VLD3dWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3d8_UPD; + case ARM::VLD3dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3d16_UPD; + case ARM::VLD3dWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3d32_UPD; + case ARM::VLD3qWB_fixed_Asm_8: Spacing = 2; return ARM::VLD3q8_UPD; + case ARM::VLD3qWB_fixed_Asm_16: Spacing = 2; return ARM::VLD3q16_UPD; + case ARM::VLD3qWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3q32_UPD; + case ARM::VLD3dWB_register_Asm_8: Spacing = 1; return ARM::VLD3d8_UPD; + case ARM::VLD3dWB_register_Asm_16: Spacing = 1; return ARM::VLD3d16_UPD; + case ARM::VLD3dWB_register_Asm_32: Spacing = 1; return ARM::VLD3d32_UPD; + case ARM::VLD3qWB_register_Asm_8: Spacing = 2; return ARM::VLD3q8_UPD; + case ARM::VLD3qWB_register_Asm_16: Spacing = 2; return ARM::VLD3q16_UPD; + case ARM::VLD3qWB_register_Asm_32: Spacing = 2; return ARM::VLD3q32_UPD; + case ARM::VLD3dAsm_8: Spacing = 1; return ARM::VLD3d8; + case ARM::VLD3dAsm_16: Spacing = 1; return ARM::VLD3d16; + case ARM::VLD3dAsm_32: Spacing = 1; return ARM::VLD3d32; + case ARM::VLD3qAsm_8: Spacing = 2; return ARM::VLD3q8; + case ARM::VLD3qAsm_16: Spacing = 2; return ARM::VLD3q16; + case ARM::VLD3qAsm_32: Spacing = 2; return ARM::VLD3q32; + + // VLD4LN + case ARM::VLD4LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4LNd8_UPD; + case ARM::VLD4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD; + case ARM::VLD4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD; + case ARM::VLD4LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNq16_UPD; + case ARM::VLD4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD; + case ARM::VLD4LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD4LNd8_UPD; + case ARM::VLD4LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD; + case ARM::VLD4LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD; + case ARM::VLD4LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD4LNq16_UPD; + case ARM::VLD4LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD; + case ARM::VLD4LNdAsm_8: Spacing = 1; return ARM::VLD4LNd8; + case ARM::VLD4LNdAsm_16: Spacing = 1; return ARM::VLD4LNd16; + case ARM::VLD4LNdAsm_32: Spacing = 1; return ARM::VLD4LNd32; + case ARM::VLD4LNqAsm_16: Spacing = 2; return ARM::VLD4LNq16; + case ARM::VLD4LNqAsm_32: Spacing = 2; return ARM::VLD4LNq32; + + // VLD4DUP + case ARM::VLD4DUPdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4DUPd8_UPD; + case ARM::VLD4DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4DUPd16_UPD; + case ARM::VLD4DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4DUPd32_UPD; + case ARM::VLD4DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4DUPq8_UPD; + case ARM::VLD4DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4DUPq16_UPD; + case ARM::VLD4DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4DUPq32_UPD; + case ARM::VLD4DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD4DUPd8_UPD; + case ARM::VLD4DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD4DUPd16_UPD; + case ARM::VLD4DUPdWB_register_Asm_32: Spacing = 1; return ARM::VLD4DUPd32_UPD; + case ARM::VLD4DUPqWB_register_Asm_8: Spacing = 2; return ARM::VLD4DUPq8_UPD; + case ARM::VLD4DUPqWB_register_Asm_16: Spacing = 2; return ARM::VLD4DUPq16_UPD; + case ARM::VLD4DUPqWB_register_Asm_32: Spacing = 2; return ARM::VLD4DUPq32_UPD; + case ARM::VLD4DUPdAsm_8: Spacing = 1; return ARM::VLD4DUPd8; + case ARM::VLD4DUPdAsm_16: Spacing = 1; return ARM::VLD4DUPd16; + case ARM::VLD4DUPdAsm_32: Spacing = 1; return ARM::VLD4DUPd32; + case ARM::VLD4DUPqAsm_8: Spacing = 2; return ARM::VLD4DUPq8; + case ARM::VLD4DUPqAsm_16: Spacing = 2; return ARM::VLD4DUPq16; + case ARM::VLD4DUPqAsm_32: Spacing = 2; return ARM::VLD4DUPq32; + + // VLD4 + case ARM::VLD4dWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4d8_UPD; + case ARM::VLD4dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD; + case ARM::VLD4dWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4d32_UPD; + case ARM::VLD4qWB_fixed_Asm_8: Spacing = 2; return ARM::VLD4q8_UPD; + case ARM::VLD4qWB_fixed_Asm_16: Spacing = 2; return ARM::VLD4q16_UPD; + case ARM::VLD4qWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4q32_UPD; + case ARM::VLD4dWB_register_Asm_8: Spacing = 1; return ARM::VLD4d8_UPD; + case ARM::VLD4dWB_register_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD; + case ARM::VLD4dWB_register_Asm_32: Spacing = 1; return ARM::VLD4d32_UPD; + case ARM::VLD4qWB_register_Asm_8: Spacing = 2; return ARM::VLD4q8_UPD; + case ARM::VLD4qWB_register_Asm_16: Spacing = 2; return ARM::VLD4q16_UPD; + case ARM::VLD4qWB_register_Asm_32: Spacing = 2; return ARM::VLD4q32_UPD; + case ARM::VLD4dAsm_8: Spacing = 1; return ARM::VLD4d8; + case ARM::VLD4dAsm_16: Spacing = 1; return ARM::VLD4d16; + case ARM::VLD4dAsm_32: Spacing = 1; return ARM::VLD4d32; + case ARM::VLD4qAsm_8: Spacing = 2; return ARM::VLD4q8; + case ARM::VLD4qAsm_16: Spacing = 2; return ARM::VLD4q16; + case ARM::VLD4qAsm_32: Spacing = 2; return ARM::VLD4q32; } } @@ -5200,24 +5503,86 @@ bool ARMAsmParser:: processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { switch (Inst.getOpcode()) { + // Aliases for alternate PC+imm syntax of LDR instructions. + case ARM::t2LDRpcrel: + Inst.setOpcode(ARM::t2LDRpci); + return true; + case ARM::t2LDRBpcrel: + Inst.setOpcode(ARM::t2LDRBpci); + return true; + case ARM::t2LDRHpcrel: + Inst.setOpcode(ARM::t2LDRHpci); + return true; + case ARM::t2LDRSBpcrel: + Inst.setOpcode(ARM::t2LDRSBpci); + return true; + case ARM::t2LDRSHpcrel: + Inst.setOpcode(ARM::t2LDRSHpci); + return true; // Handle NEON VST complex aliases. - case ARM::VST1LNdWB_register_Asm_8: case ARM::VST1LNdWB_register_Asm_P8: - case ARM::VST1LNdWB_register_Asm_I8: case ARM::VST1LNdWB_register_Asm_S8: - case ARM::VST1LNdWB_register_Asm_U8: case ARM::VST1LNdWB_register_Asm_16: - case ARM::VST1LNdWB_register_Asm_P16: case ARM::VST1LNdWB_register_Asm_I16: - case ARM::VST1LNdWB_register_Asm_S16: case ARM::VST1LNdWB_register_Asm_U16: - case ARM::VST1LNdWB_register_Asm_32: case ARM::VST1LNdWB_register_Asm_F: - case ARM::VST1LNdWB_register_Asm_F32: case ARM::VST1LNdWB_register_Asm_I32: - case ARM::VST1LNdWB_register_Asm_S32: case ARM::VST1LNdWB_register_Asm_U32: { + case ARM::VST1LNdWB_register_Asm_8: + case ARM::VST1LNdWB_register_Asm_16: + case ARM::VST1LNdWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VST2LNdWB_register_Asm_8: + case ARM::VST2LNdWB_register_Asm_16: + case ARM::VST2LNdWB_register_Asm_32: + case ARM::VST2LNqWB_register_Asm_16: + case ARM::VST2LNqWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VST3LNdWB_register_Asm_8: + case ARM::VST3LNdWB_register_Asm_16: + case ARM::VST3LNdWB_register_Asm_32: + case ARM::VST3LNqWB_register_Asm_16: + case ARM::VST3LNqWB_register_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment TmpInst.addOperand(Inst.getOperand(4)); // Rm TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(5)); // CondCode TmpInst.addOperand(Inst.getOperand(6)); @@ -5225,42 +5590,42 @@ processInstruction(MCInst &Inst, return true; } - case ARM::VST2LNdWB_register_Asm_8: case ARM::VST2LNdWB_register_Asm_P8: - case ARM::VST2LNdWB_register_Asm_I8: case ARM::VST2LNdWB_register_Asm_S8: - case ARM::VST2LNdWB_register_Asm_U8: case ARM::VST2LNdWB_register_Asm_16: - case ARM::VST2LNdWB_register_Asm_P16: case ARM::VST2LNdWB_register_Asm_I16: - case ARM::VST2LNdWB_register_Asm_S16: case ARM::VST2LNdWB_register_Asm_U16: - case ARM::VST2LNdWB_register_Asm_32: case ARM::VST2LNdWB_register_Asm_F: - case ARM::VST2LNdWB_register_Asm_F32: case ARM::VST2LNdWB_register_Asm_I32: - case ARM::VST2LNdWB_register_Asm_S32: case ARM::VST2LNdWB_register_Asm_U32: { + case ARM::VST4LNdWB_register_Asm_8: + case ARM::VST4LNdWB_register_Asm_16: + case ARM::VST4LNdWB_register_Asm_32: + case ARM::VST4LNqWB_register_Asm_16: + case ARM::VST4LNqWB_register_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment TmpInst.addOperand(Inst.getOperand(4)); // Rm TmpInst.addOperand(Inst.getOperand(0)); // Vd - TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(5)); // CondCode TmpInst.addOperand(Inst.getOperand(6)); Inst = TmpInst; return true; } - case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_P8: - case ARM::VST1LNdWB_fixed_Asm_I8: case ARM::VST1LNdWB_fixed_Asm_S8: - case ARM::VST1LNdWB_fixed_Asm_U8: case ARM::VST1LNdWB_fixed_Asm_16: - case ARM::VST1LNdWB_fixed_Asm_P16: case ARM::VST1LNdWB_fixed_Asm_I16: - case ARM::VST1LNdWB_fixed_Asm_S16: case ARM::VST1LNdWB_fixed_Asm_U16: - case ARM::VST1LNdWB_fixed_Asm_32: case ARM::VST1LNdWB_fixed_Asm_F: - case ARM::VST1LNdWB_fixed_Asm_F32: case ARM::VST1LNdWB_fixed_Asm_I32: - case ARM::VST1LNdWB_fixed_Asm_S32: case ARM::VST1LNdWB_fixed_Asm_U32: { + + case ARM::VST1LNdWB_fixed_Asm_8: + case ARM::VST1LNdWB_fixed_Asm_16: + case ARM::VST1LNdWB_fixed_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment @@ -5273,43 +5638,49 @@ processInstruction(MCInst &Inst, return true; } - case ARM::VST2LNdWB_fixed_Asm_8: case ARM::VST2LNdWB_fixed_Asm_P8: - case ARM::VST2LNdWB_fixed_Asm_I8: case ARM::VST2LNdWB_fixed_Asm_S8: - case ARM::VST2LNdWB_fixed_Asm_U8: case ARM::VST2LNdWB_fixed_Asm_16: - case ARM::VST2LNdWB_fixed_Asm_P16: case ARM::VST2LNdWB_fixed_Asm_I16: - case ARM::VST2LNdWB_fixed_Asm_S16: case ARM::VST2LNdWB_fixed_Asm_U16: - case ARM::VST2LNdWB_fixed_Asm_32: case ARM::VST2LNdWB_fixed_Asm_F: - case ARM::VST2LNdWB_fixed_Asm_F32: case ARM::VST2LNdWB_fixed_Asm_I32: - case ARM::VST2LNdWB_fixed_Asm_S32: case ARM::VST2LNdWB_fixed_Asm_U32: { + case ARM::VST2LNdWB_fixed_Asm_8: + case ARM::VST2LNdWB_fixed_Asm_16: + case ARM::VST2LNdWB_fixed_Asm_32: + case ARM::VST2LNqWB_fixed_Asm_16: + case ARM::VST2LNqWB_fixed_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm TmpInst.addOperand(Inst.getOperand(0)); // Vd - TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(4)); // CondCode TmpInst.addOperand(Inst.getOperand(5)); Inst = TmpInst; return true; } - case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_P8: case ARM::VST1LNdAsm_I8: - case ARM::VST1LNdAsm_S8: case ARM::VST1LNdAsm_U8: case ARM::VST1LNdAsm_16: - case ARM::VST1LNdAsm_P16: case ARM::VST1LNdAsm_I16: case ARM::VST1LNdAsm_S16: - case ARM::VST1LNdAsm_U16: case ARM::VST1LNdAsm_32: case ARM::VST1LNdAsm_F: - case ARM::VST1LNdAsm_F32: case ARM::VST1LNdAsm_I32: case ARM::VST1LNdAsm_S32: - case ARM::VST1LNdAsm_U32: { + + case ARM::VST3LNdWB_fixed_Asm_8: + case ARM::VST3LNdWB_fixed_Asm_16: + case ARM::VST3LNdWB_fixed_Asm_32: + case ARM::VST3LNqWB_fixed_Asm_16: + case ARM::VST3LNqWB_fixed_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(4)); // CondCode TmpInst.addOperand(Inst.getOperand(5)); @@ -5317,45 +5688,166 @@ processInstruction(MCInst &Inst, return true; } - case ARM::VST2LNdAsm_8: case ARM::VST2LNdAsm_P8: case ARM::VST2LNdAsm_I8: - case ARM::VST2LNdAsm_S8: case ARM::VST2LNdAsm_U8: case ARM::VST2LNdAsm_16: - case ARM::VST2LNdAsm_P16: case ARM::VST2LNdAsm_I16: case ARM::VST2LNdAsm_S16: - case ARM::VST2LNdAsm_U16: case ARM::VST2LNdAsm_32: case ARM::VST2LNdAsm_F: - case ARM::VST2LNdAsm_F32: case ARM::VST2LNdAsm_I32: case ARM::VST2LNdAsm_S32: - case ARM::VST2LNdAsm_U32: { + case ARM::VST4LNdWB_fixed_Asm_8: + case ARM::VST4LNdWB_fixed_Asm_16: + case ARM::VST4LNdWB_fixed_Asm_32: + case ARM::VST4LNqWB_fixed_Asm_16: + case ARM::VST4LNqWB_fixed_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm TmpInst.addOperand(Inst.getOperand(0)); // Vd - TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(4)); // CondCode TmpInst.addOperand(Inst.getOperand(5)); Inst = TmpInst; return true; } + + case ARM::VST1LNdAsm_8: + case ARM::VST1LNdAsm_16: + case ARM::VST1LNdAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VST2LNdAsm_8: + case ARM::VST2LNdAsm_16: + case ARM::VST2LNdAsm_32: + case ARM::VST2LNqAsm_16: + case ARM::VST2LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VST3LNdAsm_8: + case ARM::VST3LNdAsm_16: + case ARM::VST3LNdAsm_32: + case ARM::VST3LNqAsm_16: + case ARM::VST3LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VST4LNdAsm_8: + case ARM::VST4LNdAsm_16: + case ARM::VST4LNdAsm_32: + case ARM::VST4LNqAsm_16: + case ARM::VST4LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // Handle NEON VLD complex aliases. - case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_P8: - case ARM::VLD1LNdWB_register_Asm_I8: case ARM::VLD1LNdWB_register_Asm_S8: - case ARM::VLD1LNdWB_register_Asm_U8: case ARM::VLD1LNdWB_register_Asm_16: - case ARM::VLD1LNdWB_register_Asm_P16: case ARM::VLD1LNdWB_register_Asm_I16: - case ARM::VLD1LNdWB_register_Asm_S16: case ARM::VLD1LNdWB_register_Asm_U16: - case ARM::VLD1LNdWB_register_Asm_32: case ARM::VLD1LNdWB_register_Asm_F: - case ARM::VLD1LNdWB_register_Asm_F32: case ARM::VLD1LNdWB_register_Asm_I32: - case ARM::VLD1LNdWB_register_Asm_S32: case ARM::VLD1LNdWB_register_Asm_U32: { + case ARM::VLD1LNdWB_register_Asm_8: + case ARM::VLD1LNdWB_register_Asm_16: + case ARM::VLD1LNdWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VLD2LNdWB_register_Asm_8: + case ARM::VLD2LNdWB_register_Asm_16: + case ARM::VLD2LNdWB_register_Asm_32: + case ARM::VLD2LNqWB_register_Asm_16: + case ARM::VLD2LNqWB_register_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment TmpInst.addOperand(Inst.getOperand(4)); // Rm TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(5)); // CondCode TmpInst.addOperand(Inst.getOperand(6)); @@ -5363,26 +5855,30 @@ processInstruction(MCInst &Inst, return true; } - case ARM::VLD2LNdWB_register_Asm_8: case ARM::VLD2LNdWB_register_Asm_P8: - case ARM::VLD2LNdWB_register_Asm_I8: case ARM::VLD2LNdWB_register_Asm_S8: - case ARM::VLD2LNdWB_register_Asm_U8: case ARM::VLD2LNdWB_register_Asm_16: - case ARM::VLD2LNdWB_register_Asm_P16: case ARM::VLD2LNdWB_register_Asm_I16: - case ARM::VLD2LNdWB_register_Asm_S16: case ARM::VLD2LNdWB_register_Asm_U16: - case ARM::VLD2LNdWB_register_Asm_32: case ARM::VLD2LNdWB_register_Asm_F: - case ARM::VLD2LNdWB_register_Asm_F32: case ARM::VLD2LNdWB_register_Asm_I32: - case ARM::VLD2LNdWB_register_Asm_S32: case ARM::VLD2LNdWB_register_Asm_U32: { + case ARM::VLD3LNdWB_register_Asm_8: + case ARM::VLD3LNdWB_register_Asm_16: + case ARM::VLD3LNdWB_register_Asm_32: + case ARM::VLD3LNqWB_register_Asm_16: + case ARM::VLD3LNqWB_register_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd - TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment TmpInst.addOperand(Inst.getOperand(4)); // Rm TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) - TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(5)); // CondCode TmpInst.addOperand(Inst.getOperand(6)); @@ -5390,18 +5886,49 @@ processInstruction(MCInst &Inst, return true; } - case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_P8: - case ARM::VLD1LNdWB_fixed_Asm_I8: case ARM::VLD1LNdWB_fixed_Asm_S8: - case ARM::VLD1LNdWB_fixed_Asm_U8: case ARM::VLD1LNdWB_fixed_Asm_16: - case ARM::VLD1LNdWB_fixed_Asm_P16: case ARM::VLD1LNdWB_fixed_Asm_I16: - case ARM::VLD1LNdWB_fixed_Asm_S16: case ARM::VLD1LNdWB_fixed_Asm_U16: - case ARM::VLD1LNdWB_fixed_Asm_32: case ARM::VLD1LNdWB_fixed_Asm_F: - case ARM::VLD1LNdWB_fixed_Asm_F32: case ARM::VLD1LNdWB_fixed_Asm_I32: - case ARM::VLD1LNdWB_fixed_Asm_S32: case ARM::VLD1LNdWB_fixed_Asm_U32: { + case ARM::VLD4LNdWB_register_Asm_8: + case ARM::VLD4LNdWB_register_Asm_16: + case ARM::VLD4LNdWB_register_Asm_32: + case ARM::VLD4LNqWB_register_Asm_16: + case ARM::VLD4LNqWB_register_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VLD1LNdWB_fixed_Asm_8: + case ARM::VLD1LNdWB_fixed_Asm_16: + case ARM::VLD1LNdWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn @@ -5415,26 +5942,111 @@ processInstruction(MCInst &Inst, return true; } - case ARM::VLD2LNdWB_fixed_Asm_8: case ARM::VLD2LNdWB_fixed_Asm_P8: - case ARM::VLD2LNdWB_fixed_Asm_I8: case ARM::VLD2LNdWB_fixed_Asm_S8: - case ARM::VLD2LNdWB_fixed_Asm_U8: case ARM::VLD2LNdWB_fixed_Asm_16: - case ARM::VLD2LNdWB_fixed_Asm_P16: case ARM::VLD2LNdWB_fixed_Asm_I16: - case ARM::VLD2LNdWB_fixed_Asm_S16: case ARM::VLD2LNdWB_fixed_Asm_U16: - case ARM::VLD2LNdWB_fixed_Asm_32: case ARM::VLD2LNdWB_fixed_Asm_F: - case ARM::VLD2LNdWB_fixed_Asm_F32: case ARM::VLD2LNdWB_fixed_Asm_I32: - case ARM::VLD2LNdWB_fixed_Asm_S32: case ARM::VLD2LNdWB_fixed_Asm_U32: { + case ARM::VLD2LNdWB_fixed_Asm_8: + case ARM::VLD2LNdWB_fixed_Asm_16: + case ARM::VLD2LNdWB_fixed_Asm_32: + case ARM::VLD2LNqWB_fixed_Asm_16: + case ARM::VLD2LNqWB_fixed_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd - TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) - TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3LNdWB_fixed_Asm_8: + case ARM::VLD3LNdWB_fixed_Asm_16: + case ARM::VLD3LNdWB_fixed_Asm_32: + case ARM::VLD3LNqWB_fixed_Asm_16: + case ARM::VLD3LNqWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4LNdWB_fixed_Asm_8: + case ARM::VLD4LNdWB_fixed_Asm_16: + case ARM::VLD4LNdWB_fixed_Asm_32: + case ARM::VLD4LNqWB_fixed_Asm_16: + case ARM::VLD4LNqWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD1LNdAsm_8: + case ARM::VLD1LNdAsm_16: + case ARM::VLD1LNdAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(4)); // CondCode TmpInst.addOperand(Inst.getOperand(5)); @@ -5442,20 +6054,24 @@ processInstruction(MCInst &Inst, return true; } - case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_P8: case ARM::VLD1LNdAsm_I8: - case ARM::VLD1LNdAsm_S8: case ARM::VLD1LNdAsm_U8: case ARM::VLD1LNdAsm_16: - case ARM::VLD1LNdAsm_P16: case ARM::VLD1LNdAsm_I16: case ARM::VLD1LNdAsm_S16: - case ARM::VLD1LNdAsm_U16: case ARM::VLD1LNdAsm_32: case ARM::VLD1LNdAsm_F: - case ARM::VLD1LNdAsm_F32: case ARM::VLD1LNdAsm_I32: case ARM::VLD1LNdAsm_S32: - case ARM::VLD1LNdAsm_U32: { + case ARM::VLD2LNdAsm_8: + case ARM::VLD2LNdAsm_16: + case ARM::VLD2LNdAsm_32: + case ARM::VLD2LNqAsm_16: + case ARM::VLD2LNqAsm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(4)); // CondCode TmpInst.addOperand(Inst.getOperand(5)); @@ -5463,29 +6079,549 @@ processInstruction(MCInst &Inst, return true; } - case ARM::VLD2LNdAsm_8: case ARM::VLD2LNdAsm_P8: case ARM::VLD2LNdAsm_I8: - case ARM::VLD2LNdAsm_S8: case ARM::VLD2LNdAsm_U8: case ARM::VLD2LNdAsm_16: - case ARM::VLD2LNdAsm_P16: case ARM::VLD2LNdAsm_I16: case ARM::VLD2LNdAsm_S16: - case ARM::VLD2LNdAsm_U16: case ARM::VLD2LNdAsm_32: case ARM::VLD2LNdAsm_F: - case ARM::VLD2LNdAsm_F32: case ARM::VLD2LNdAsm_I32: case ARM::VLD2LNdAsm_S32: - case ARM::VLD2LNdAsm_U32: { + case ARM::VLD3LNdAsm_8: + case ARM::VLD3LNdAsm_16: + case ARM::VLD3LNdAsm_32: + case ARM::VLD3LNqAsm_16: + case ARM::VLD3LNqAsm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd - TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) - TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(4)); // CondCode TmpInst.addOperand(Inst.getOperand(5)); Inst = TmpInst; return true; } + + case ARM::VLD4LNdAsm_8: + case ARM::VLD4LNdAsm_16: + case ARM::VLD4LNdAsm_32: + case ARM::VLD4LNqAsm_16: + case ARM::VLD4LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VLD3DUP single 3-element structure to all lanes instructions. + case ARM::VLD3DUPdAsm_8: + case ARM::VLD3DUPdAsm_16: + case ARM::VLD3DUPdAsm_32: + case ARM::VLD3DUPqAsm_8: + case ARM::VLD3DUPqAsm_16: + case ARM::VLD3DUPqAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3DUPdWB_fixed_Asm_8: + case ARM::VLD3DUPdWB_fixed_Asm_16: + case ARM::VLD3DUPdWB_fixed_Asm_32: + case ARM::VLD3DUPqWB_fixed_Asm_8: + case ARM::VLD3DUPqWB_fixed_Asm_16: + case ARM::VLD3DUPqWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3DUPdWB_register_Asm_8: + case ARM::VLD3DUPdWB_register_Asm_16: + case ARM::VLD3DUPdWB_register_Asm_32: + case ARM::VLD3DUPqWB_register_Asm_8: + case ARM::VLD3DUPqWB_register_Asm_16: + case ARM::VLD3DUPqWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VLD3 multiple 3-element structure instructions. + case ARM::VLD3dAsm_8: + case ARM::VLD3dAsm_16: + case ARM::VLD3dAsm_32: + case ARM::VLD3qAsm_8: + case ARM::VLD3qAsm_16: + case ARM::VLD3qAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3dWB_fixed_Asm_8: + case ARM::VLD3dWB_fixed_Asm_16: + case ARM::VLD3dWB_fixed_Asm_32: + case ARM::VLD3qWB_fixed_Asm_8: + case ARM::VLD3qWB_fixed_Asm_16: + case ARM::VLD3qWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3dWB_register_Asm_8: + case ARM::VLD3dWB_register_Asm_16: + case ARM::VLD3dWB_register_Asm_32: + case ARM::VLD3qWB_register_Asm_8: + case ARM::VLD3qWB_register_Asm_16: + case ARM::VLD3qWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VLD4DUP single 3-element structure to all lanes instructions. + case ARM::VLD4DUPdAsm_8: + case ARM::VLD4DUPdAsm_16: + case ARM::VLD4DUPdAsm_32: + case ARM::VLD4DUPqAsm_8: + case ARM::VLD4DUPqAsm_16: + case ARM::VLD4DUPqAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4DUPdWB_fixed_Asm_8: + case ARM::VLD4DUPdWB_fixed_Asm_16: + case ARM::VLD4DUPdWB_fixed_Asm_32: + case ARM::VLD4DUPqWB_fixed_Asm_8: + case ARM::VLD4DUPqWB_fixed_Asm_16: + case ARM::VLD4DUPqWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4DUPdWB_register_Asm_8: + case ARM::VLD4DUPdWB_register_Asm_16: + case ARM::VLD4DUPdWB_register_Asm_32: + case ARM::VLD4DUPqWB_register_Asm_8: + case ARM::VLD4DUPqWB_register_Asm_16: + case ARM::VLD4DUPqWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VLD4 multiple 4-element structure instructions. + case ARM::VLD4dAsm_8: + case ARM::VLD4dAsm_16: + case ARM::VLD4dAsm_32: + case ARM::VLD4qAsm_8: + case ARM::VLD4qAsm_16: + case ARM::VLD4qAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4dWB_fixed_Asm_8: + case ARM::VLD4dWB_fixed_Asm_16: + case ARM::VLD4dWB_fixed_Asm_32: + case ARM::VLD4qWB_fixed_Asm_8: + case ARM::VLD4qWB_fixed_Asm_16: + case ARM::VLD4qWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4dWB_register_Asm_8: + case ARM::VLD4dWB_register_Asm_16: + case ARM::VLD4dWB_register_Asm_32: + case ARM::VLD4qWB_register_Asm_8: + case ARM::VLD4qWB_register_Asm_16: + case ARM::VLD4qWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VST3 multiple 3-element structure instructions. + case ARM::VST3dAsm_8: + case ARM::VST3dAsm_16: + case ARM::VST3dAsm_32: + case ARM::VST3qAsm_8: + case ARM::VST3qAsm_16: + case ARM::VST3qAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VST3dWB_fixed_Asm_8: + case ARM::VST3dWB_fixed_Asm_16: + case ARM::VST3dWB_fixed_Asm_32: + case ARM::VST3qWB_fixed_Asm_8: + case ARM::VST3qWB_fixed_Asm_16: + case ARM::VST3qWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VST3dWB_register_Asm_8: + case ARM::VST3dWB_register_Asm_16: + case ARM::VST3dWB_register_Asm_32: + case ARM::VST3qWB_register_Asm_8: + case ARM::VST3qWB_register_Asm_16: + case ARM::VST3qWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VST4 multiple 3-element structure instructions. + case ARM::VST4dAsm_8: + case ARM::VST4dAsm_16: + case ARM::VST4dAsm_32: + case ARM::VST4qAsm_8: + case ARM::VST4qAsm_16: + case ARM::VST4qAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VST4dWB_fixed_Asm_8: + case ARM::VST4dWB_fixed_Asm_16: + case ARM::VST4dWB_fixed_Asm_32: + case ARM::VST4qWB_fixed_Asm_8: + case ARM::VST4qWB_fixed_Asm_16: + case ARM::VST4qWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VST4dWB_register_Asm_8: + case ARM::VST4dWB_register_Asm_16: + case ARM::VST4dWB_register_Asm_32: + case ARM::VST4qWB_register_Asm_8: + case ARM::VST4qWB_register_Asm_16: + case ARM::VST4qWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // Handle the Thumb2 mode MOV complex aliases. + case ARM::t2MOVsr: + case ARM::t2MOVSsr: { + // Which instruction to expand to depends on the CCOut operand and + // whether we're in an IT block if the register operands are low + // registers. + bool isNarrow = false; + if (isARMLowRegister(Inst.getOperand(0).getReg()) && + isARMLowRegister(Inst.getOperand(1).getReg()) && + isARMLowRegister(Inst.getOperand(2).getReg()) && + Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && + inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr)) + isNarrow = true; + MCInst TmpInst; + unsigned newOpc; + switch(ARM_AM::getSORegShOp(Inst.getOperand(3).getImm())) { + default: llvm_unreachable("unexpected opcode!"); + case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRrr : ARM::t2ASRrr; break; + case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRrr : ARM::t2LSRrr; break; + case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLrr : ARM::t2LSLrr; break; + case ARM_AM::ror: newOpc = isNarrow ? ARM::tROR : ARM::t2RORrr; break; + } + TmpInst.setOpcode(newOpc); + TmpInst.addOperand(Inst.getOperand(0)); // Rd + if (isNarrow) + TmpInst.addOperand(MCOperand::CreateReg( + Inst.getOpcode() == ARM::t2MOVSsr ? ARM::CPSR : 0)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + if (!isNarrow) + TmpInst.addOperand(MCOperand::CreateReg( + Inst.getOpcode() == ARM::t2MOVSsr ? ARM::CPSR : 0)); + Inst = TmpInst; + return true; + } case ARM::t2MOVsi: case ARM::t2MOVSsi: { // Which instruction to expand to depends on the CCOut operand and @@ -5504,6 +6640,7 @@ processInstruction(MCInst &Inst, case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break; case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break; case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break; + case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break; } unsigned Ammount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()); if (Ammount == 32) Ammount = 0; @@ -5513,7 +6650,8 @@ processInstruction(MCInst &Inst, TmpInst.addOperand(MCOperand::CreateReg( Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0)); TmpInst.addOperand(Inst.getOperand(1)); // Rn - TmpInst.addOperand(MCOperand::CreateImm(Ammount)); + if (newOpc != ARM::t2RRX) + TmpInst.addOperand(MCOperand::CreateImm(Ammount)); TmpInst.addOperand(Inst.getOperand(3)); // CondCode TmpInst.addOperand(Inst.getOperand(4)); if (!isNarrow) @@ -5535,7 +6673,6 @@ processInstruction(MCInst &Inst, case ARM::LSLr: ShiftTy = ARM_AM::lsl; break; case ARM::RORr: ShiftTy = ARM_AM::ror; break; } - // A shift by zero is a plain MOVr, not a MOVsi. unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, 0); MCInst TmpInst; TmpInst.setOpcode(ARM::MOVsr); @@ -5881,6 +7018,57 @@ processInstruction(MCInst &Inst, } break; } + case ARM::MOVsi: { + ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm()); + if (SOpc == ARM_AM::rrx) return false; + if (ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()) == 0) { + // Shifting by zero is accepted as a vanilla 'MOVr' + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVr); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + return false; + } + case ARM::ANDrsi: + case ARM::ORRrsi: + case ARM::EORrsi: + case ARM::BICrsi: + case ARM::SUBrsi: + case ARM::ADDrsi: { + unsigned newOpc; + ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(3).getImm()); + if (SOpc == ARM_AM::rrx) return false; + switch (Inst.getOpcode()) { + default: llvm_unreachable("unexpected opcode!"); + case ARM::ANDrsi: newOpc = ARM::ANDrr; break; + case ARM::ORRrsi: newOpc = ARM::ORRrr; break; + case ARM::EORrsi: newOpc = ARM::EORrr; break; + case ARM::BICrsi: newOpc = ARM::BICrr; break; + case ARM::SUBrsi: newOpc = ARM::SUBrr; break; + case ARM::ADDrsi: newOpc = ARM::ADDrr; break; + } + // If the shift is by zero, use the non-shifted instruction definition. + if (ARM_AM::getSORegOffset(Inst.getOperand(3).getImm()) == 0) { + MCInst TmpInst; + TmpInst.setOpcode(newOpc); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(4)); + TmpInst.addOperand(Inst.getOperand(5)); + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + return false; + } + case ARM::ITasm: case ARM::t2IT: { // The mask bits for all but the first condition are represented as // the low bit of the condition code value implies 't'. We currently @@ -5987,6 +7175,12 @@ MatchAndEmitInstruction(SMLoc IDLoc, // block. forwardITPosition(); + // ITasm is an ARM mode pseudo-instruction that just sets the ITblock and + // doesn't actually encode. + if (Inst.getOpcode() == ARM::ITasm) + return false; + + Inst.setLoc(IDLoc); Out.EmitInstruction(Inst); return false; case Match_MissingFeature: @@ -6020,7 +7214,6 @@ MatchAndEmitInstruction(SMLoc IDLoc, } llvm_unreachable("Implement any new match types added!"); - return true; } /// parseDirective parses the arm specific directives @@ -6040,6 +7233,10 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveSyntax(DirectiveID.getLoc()); else if (IDVal == ".unreq") return parseDirectiveUnreq(DirectiveID.getLoc()); + else if (IDVal == ".arch") + return parseDirectiveArch(DirectiveID.getLoc()); + else if (IDVal == ".eabi_attribute") + return parseDirectiveEabiAttr(DirectiveID.getLoc()); return true; } @@ -6100,23 +7297,32 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { const MCAsmInfo &MAI = getParser().getStreamer().getContext().getAsmInfo(); bool isMachO = MAI.hasSubsectionsViaSymbols(); StringRef Name; + bool needFuncName = true; - // Darwin asm has function name after .thumb_func direction + // Darwin asm has (optionally) function name after .thumb_func direction // ELF doesn't if (isMachO) { const AsmToken &Tok = Parser.getTok(); - if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) - return Error(L, "unexpected token in .thumb_func directive"); - Name = Tok.getIdentifier(); - Parser.Lex(); // Consume the identifier token. + if (Tok.isNot(AsmToken::EndOfStatement)) { + if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) + return Error(L, "unexpected token in .thumb_func directive"); + Name = Tok.getIdentifier(); + Parser.Lex(); // Consume the identifier token. + needFuncName = false; + } } - if (getLexer().isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return Error(L, "unexpected token in directive"); - Parser.Lex(); + + // Eat the end of statement and any blank lines that follow. + while (getLexer().is(AsmToken::EndOfStatement)) + Parser.Lex(); // FIXME: assuming function name will be the line following .thumb_func - if (!isMachO) { + // We really should be checking the next symbol definition even if there's + // stuff in between. + if (needFuncName) { Name = Parser.getTok().getIdentifier(); } @@ -6219,6 +7425,18 @@ bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { return false; } +/// parseDirectiveArch +/// ::= .arch token +bool ARMAsmParser::parseDirectiveArch(SMLoc L) { + return true; +} + +/// parseDirectiveEabiAttr +/// ::= .eabi_attribute int, int +bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { + return true; +} + extern "C" void LLVMInitializeARMAsmLexer(); /// Force static initialization. diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 04cdf55..9a2aab5 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -33,6 +33,7 @@ add_llvm_target(ARMCodeGen ARMJITInfo.cpp ARMLoadStoreOptimizer.cpp ARMMCInstLower.cpp + ARMMachineFunctionInfo.cpp ARMRegisterInfo.cpp ARMSelectionDAGInfo.cpp ARMSubtarget.cpp diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 49c64fd..4101f59 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -1,4 +1,4 @@ -//===- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA -----*- C++ -*-===// +//===-- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA --------------===// // // The LLVM Compiler Infrastructure // @@ -52,7 +52,7 @@ public: raw_ostream &cStream) const; /// getEDInfo - See MCDisassembler. - EDInstInfo *getEDInfo() const; + const EDInstInfo *getEDInfo() const; private: }; @@ -77,7 +77,7 @@ public: raw_ostream &cStream) const; /// getEDInfo - See MCDisassembler. - EDInstInfo *getEDInfo() const; + const EDInstInfo *getEDInfo() const; private: mutable std::vector<unsigned> ITBlock; DecodeStatus AddThumbPredicate(MCInst&) const; @@ -97,7 +97,7 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) { Out = In; return false; } - return false; + llvm_unreachable("Invalid DecodeStatus!"); } @@ -126,6 +126,11 @@ static DecodeStatus DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, const void *Decoder); static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDPairSpacedRegisterClass(llvm::MCInst &Inst, + unsigned RegNo, uint64_t Address, + const void *Decoder); static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); @@ -332,11 +337,11 @@ static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtarge return new ThumbDisassembler(STI); } -EDInstInfo *ARMDisassembler::getEDInfo() const { +const EDInstInfo *ARMDisassembler::getEDInfo() const { return instInfoARM; } -EDInstInfo *ThumbDisassembler::getEDInfo() const { +const EDInstInfo *ThumbDisassembler::getEDInfo() const { return instInfoARM; } @@ -440,40 +445,38 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, MCInst &MI, const void *Decoder) { const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback(); - if (!getOpInfo) - return false; - struct LLVMOpInfo1 SymbolicOp; + memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); SymbolicOp.Value = Value; void *DisInfo = Dis->getDisInfoBlock(); - if (!getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { - if (isBranch) { - LLVMSymbolLookupCallback SymbolLookUp = - Dis->getLLVMSymbolLookupCallback(); - if (SymbolLookUp) { - uint64_t ReferenceType; - ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; - const char *ReferenceName; - const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, - &ReferenceName); - if (Name) { - SymbolicOp.AddSymbol.Name = Name; - SymbolicOp.AddSymbol.Present = true; - SymbolicOp.Value = 0; - } - else { - SymbolicOp.Value = Value; - } - if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) - (*Dis->CommentStream) << "symbol stub for: " << ReferenceName; - } - else { - return false; - } - } - else { + + if (!getOpInfo || + !getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { + // Clear SymbolicOp.Value from above and also all other fields. + memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); + LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback(); + if (!SymbolLookUp) return false; + uint64_t ReferenceType; + if (isBranch) + ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; + else + ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; + const char *ReferenceName; + const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, + &ReferenceName); + if (Name) { + SymbolicOp.AddSymbol.Name = Name; + SymbolicOp.AddSymbol.Present = true; } + // For branches always create an MCExpr so it gets printed as hex address. + else if (isBranch) { + SymbolicOp.Value = Value; + } + if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) + (*Dis->CommentStream) << "symbol stub for: " << ReferenceName; + if (!Name && !isBranch) + return false; } MCContext *Ctx = Dis->getMCContext(); @@ -533,7 +536,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None) MI.addOperand(MCOperand::CreateExpr(Expr)); else - assert(0 && "bad SymbolicOp.VariantKind"); + llvm_unreachable("bad SymbolicOp.VariantKind"); return true; } @@ -548,7 +551,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, /// a literal 'C' string if the referenced address of the literal pool's entry /// is an address into a section with 'C' string literals. static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value, - const void *Decoder) { + const void *Decoder) { const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback(); if (SymbolLookUp) { @@ -989,6 +992,48 @@ static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } +static const unsigned DPairDecoderTable[] = { + ARM::Q0, ARM::D1_D2, ARM::Q1, ARM::D3_D4, ARM::Q2, ARM::D5_D6, + ARM::Q3, ARM::D7_D8, ARM::Q4, ARM::D9_D10, ARM::Q5, ARM::D11_D12, + ARM::Q6, ARM::D13_D14, ARM::Q7, ARM::D15_D16, ARM::Q8, ARM::D17_D18, + ARM::Q9, ARM::D19_D20, ARM::Q10, ARM::D21_D22, ARM::Q11, ARM::D23_D24, + ARM::Q12, ARM::D25_D26, ARM::Q13, ARM::D27_D28, ARM::Q14, ARM::D29_D30, + ARM::Q15 +}; + +static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 30) + return MCDisassembler::Fail; + + unsigned Register = DPairDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static const unsigned DPairSpacedDecoderTable[] = { + ARM::D0_D2, ARM::D1_D3, ARM::D2_D4, ARM::D3_D5, + ARM::D4_D6, ARM::D5_D7, ARM::D6_D8, ARM::D7_D9, + ARM::D8_D10, ARM::D9_D11, ARM::D10_D12, ARM::D11_D13, + ARM::D12_D14, ARM::D13_D15, ARM::D14_D16, ARM::D15_D17, + ARM::D16_D18, ARM::D17_D19, ARM::D18_D20, ARM::D19_D21, + ARM::D20_D22, ARM::D21_D23, ARM::D22_D24, ARM::D23_D25, + ARM::D24_D26, ARM::D25_D27, ARM::D26_D28, ARM::D27_D29, + ARM::D28_D30, ARM::D29_D31 +}; + +static DecodeStatus DecodeDPairSpacedRegisterClass(llvm::MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 29) + return MCDisassembler::Fail; + + unsigned Register = DPairSpacedDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (Val == 0xF) return MCDisassembler::Fail; @@ -1910,12 +1955,14 @@ DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn, if (pred == 0xF) { Inst.setOpcode(ARM::BLXi); imm |= fieldFromInstruction32(Insn, 24, 1) << 1; + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, + true, 4, Inst, Decoder)) Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm))); return S; } - if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, true, - 4, Inst, Decoder)) + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, + true, 4, Inst, Decoder)) Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm))); if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) return MCDisassembler::Fail; @@ -1953,8 +2000,47 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned Rm = fieldFromInstruction32(Insn, 0, 4); // First output register - if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) - return MCDisassembler::Fail; + switch (Inst.getOpcode()) { + case ARM::VLD1q16: + case ARM::VLD1q32: + case ARM::VLD1q64: + case ARM::VLD1q8: + case ARM::VLD1q16wb_fixed: + case ARM::VLD1q16wb_register: + case ARM::VLD1q32wb_fixed: + case ARM::VLD1q32wb_register: + case ARM::VLD1q64wb_fixed: + case ARM::VLD1q64wb_register: + case ARM::VLD1q8wb_fixed: + case ARM::VLD1q8wb_register: + case ARM::VLD2d16: + case ARM::VLD2d32: + case ARM::VLD2d8: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d16wb_register: + case ARM::VLD2d32wb_fixed: + case ARM::VLD2d32wb_register: + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d8wb_register: + if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VLD2b16: + case ARM::VLD2b32: + case ARM::VLD2b8: + case ARM::VLD2b16wb_fixed: + case ARM::VLD2b16wb_register: + case ARM::VLD2b32wb_fixed: + case ARM::VLD2b32wb_register: + case ARM::VLD2b8wb_fixed: + case ARM::VLD2b8wb_register: + if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + } // Second output register switch (Inst.getOpcode()) { @@ -2285,8 +2371,47 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, // First input register - if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) - return MCDisassembler::Fail; + switch (Inst.getOpcode()) { + case ARM::VST1q16: + case ARM::VST1q32: + case ARM::VST1q64: + case ARM::VST1q8: + case ARM::VST1q16wb_fixed: + case ARM::VST1q16wb_register: + case ARM::VST1q32wb_fixed: + case ARM::VST1q32wb_register: + case ARM::VST1q64wb_fixed: + case ARM::VST1q64wb_register: + case ARM::VST1q8wb_fixed: + case ARM::VST1q8wb_register: + case ARM::VST2d16: + case ARM::VST2d32: + case ARM::VST2d8: + case ARM::VST2d16wb_fixed: + case ARM::VST2d16wb_register: + case ARM::VST2d32wb_fixed: + case ARM::VST2d32wb_register: + case ARM::VST2d8wb_fixed: + case ARM::VST2d8wb_register: + if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VST2b16: + case ARM::VST2b32: + case ARM::VST2b8: + case ARM::VST2b16wb_fixed: + case ARM::VST2b16wb_register: + case ARM::VST2b32wb_fixed: + case ARM::VST2b32wb_register: + case ARM::VST2b8wb_fixed: + case ARM::VST2b8wb_register: + if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + } // Second input register switch (Inst.getOpcode()) { @@ -2652,8 +2777,16 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; // Writeback } - if (!Check(S, DecodeDPRRegisterClass(Inst, Rn, Address, Decoder))) - return MCDisassembler::Fail; + switch (Inst.getOpcode()) { + case ARM::VTBL2: + case ARM::VTBX2: + if (!Check(S, DecodeDPairRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + if (!Check(S, DecodeDPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; @@ -3127,7 +3260,9 @@ DecodeThumbBCCTargetOperand(llvm::MCInst &Inst, unsigned Val, static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder){ - Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1))); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4, + true, 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1))); return MCDisassembler::Success; } diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 662097a..bae4e78 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -18,6 +18,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -35,8 +36,9 @@ static unsigned translateShiftImm(unsigned imm) { ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) : - MCInstPrinter(MAI) { + MCInstPrinter(MAI, MRI) { // Initialize the set of available features. setAvailableFeatures(STI.getFeatureBits()); } @@ -436,6 +438,12 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op, raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + if (!MO1.isReg()) { // For label symbolic references. + printOperand(MI, Op, O); + return; + } + const MCOperand &MO3 = MI->getOperand(Op+2); unsigned IdxMode = ARM_AM::getAM3IdxMode(MO3.getImm()); @@ -639,7 +647,7 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, if (getAvailableFeatures() & ARM::FeatureMClass) { switch (Op.getImm()) { - default: assert(0 && "Unexpected mask value!"); + default: llvm_unreachable("Unexpected mask value!"); case 0: O << "apsr"; return; case 1: O << "iapsr"; return; case 2: O << "eapsr"; return; @@ -662,12 +670,11 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) { O << "APSR_"; switch (Mask) { - default: assert(0); + default: llvm_unreachable("Unexpected mask value!"); case 4: O << "g"; return; case 8: O << "nzcvq"; return; case 12: O << "nzcvqg"; return; } - llvm_unreachable("Unexpected mask value!"); } if (SpecRegRBit) @@ -687,7 +694,10 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); - if (CC != ARMCC::AL) + // Handle the undefined 15 CC value here for printing so we don't abort(). + if ((unsigned)CC == 15) + O << "<und>"; + else if (CC != ARMCC::AL) O << ARMCondCodeToString(CC); } @@ -885,6 +895,11 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); + if (!MO1.isReg()) { // For label symbolic references. + printOperand(MI, OpNum, O); + return; + } + O << "[" << getRegisterName(MO1.getReg()); int32_t OffImm = (int32_t)MO2.getImm() / 4; @@ -990,6 +1005,16 @@ void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum, } } +void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "#" << 16 - MI->getOperand(OpNum).getImm(); +} + +void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "#" << 32 - MI->getOperand(OpNum).getImm(); +} + void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O) { O << "[" << MI->getOperand(OpNum).getImm() << "]"; @@ -1009,6 +1034,23 @@ void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum, << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "}"; } +void ARMInstPrinter::printVectorListDPair(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Reg = MI->getOperand(OpNum).getReg(); + unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); + unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1); + O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}"; +} + +void ARMInstPrinter::printVectorListDPairSpaced(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + unsigned Reg = MI->getOperand(OpNum).getReg(); + unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); + unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2); + O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}"; +} + void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O) { // Normally, it's not safe to use register enum values directly with @@ -1046,6 +1088,29 @@ void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI, << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[]}"; } +void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}"; +} + +void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "[]}"; +} + void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, raw_ostream &O) { // Normally, it's not safe to use register enum values directly with @@ -1055,3 +1120,58 @@ void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}"; } +void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}"; +} + +void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[]}"; +} + +void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "[]}"; +} + +void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "}"; +} + +void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "}"; +} diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 05db2d2..1037161 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -1,4 +1,4 @@ -//===-- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax ----------===// +//===- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -23,7 +23,8 @@ class MCOperand; class ARMInstPrinter : public MCInstPrinter { public: - ARMInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI); + ARMInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI); virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); virtual StringRef getOpcodeName(unsigned Opcode) const; @@ -128,17 +129,36 @@ public: void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListDPair(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListDPairSpaced(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListFourSpacedAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum, + raw_ostream &O); }; } // end namespace llvm diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h index 9982fa6..62473b2 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h @@ -1,4 +1,4 @@ -//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===// +//===-- ARMAddressingModes.h - ARM Addressing Modes -------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,6 +16,7 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include <cassert> @@ -43,7 +44,7 @@ namespace ARM_AM { static inline const char *getShiftOpcStr(ShiftOpc Op) { switch (Op) { - default: assert(0 && "Unknown shift opc!"); + default: llvm_unreachable("Unknown shift opc!"); case ARM_AM::asr: return "asr"; case ARM_AM::lsl: return "lsl"; case ARM_AM::lsr: return "lsr"; @@ -54,7 +55,7 @@ namespace ARM_AM { static inline unsigned getShiftOpcEncoding(ShiftOpc Op) { switch (Op) { - default: assert(0 && "Unknown shift opc!"); + default: llvm_unreachable("Unknown shift opc!"); case ARM_AM::asr: return 2; case ARM_AM::lsl: return 0; case ARM_AM::lsr: return 1; @@ -72,7 +73,7 @@ namespace ARM_AM { static inline const char *getAMSubModeStr(AMSubMode Mode) { switch (Mode) { - default: assert(0 && "Unknown addressing sub-mode!"); + default: llvm_unreachable("Unknown addressing sub-mode!"); case ARM_AM::ia: return "ia"; case ARM_AM::ib: return "ib"; case ARM_AM::da: return "da"; @@ -569,7 +570,7 @@ namespace ARM_AM { } EltBits = 64; } else { - assert(false && "Unsupported NEON immediate"); + llvm_unreachable("Unsupported NEON immediate"); } return Val; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index bf1f0e8..d3a3d3a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" @@ -31,8 +32,8 @@ using namespace llvm; namespace { class ARMELFObjectWriter : public MCELFObjectTargetWriter { public: - ARMELFObjectWriter(Triple::OSType OSType) - : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSType, ELF::EM_ARM, + ARMELFObjectWriter(uint8_t OSABI) + : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM, /*HasRelocationAddend*/ false) {} }; @@ -63,6 +64,7 @@ public: { "fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, +{ "fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, @@ -76,6 +78,8 @@ public: { "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_bl", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, @@ -100,18 +104,46 @@ public: return Infos[Kind - FirstTargetFixupKind]; } - bool MayNeedRelaxation(const MCInst &Inst) const; + /// processFixupValue - Target hook to process the literal value of a fixup + /// if necessary. + void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFixup &Fixup, const MCFragment *DF, + MCValue &Target, uint64_t &Value, + bool &IsResolved) { + const MCSymbolRefExpr *A = Target.getSymA(); + // Some fixups to thumb function symbols need the low bit (thumb bit) + // twiddled. + if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 && + (unsigned)Fixup.getKind() != ARM::fixup_t2_ldst_pcrel_12 && + (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) { + if (A) { + const MCSymbol &Sym = A->getSymbol().AliasedSymbol(); + if (Asm.isThumbFunc(&Sym)) + Value |= 1; + } + } + // We must always generate a relocation for BL/BLX instructions if we have + // a symbol to reference, as the linker relies on knowing the destination + // symbol's thumb-ness to get interworking right. + if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx || + (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl || + (unsigned)Fixup.getKind() == ARM::fixup_arm_blx || + (unsigned)Fixup.getKind() == ARM::fixup_arm_bl)) + IsResolved = false; + } + + bool mayNeedRelaxation(const MCInst &Inst) const; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCInstFragment *DF, const MCAsmLayout &Layout) const; - void RelaxInstruction(const MCInst &Inst, MCInst &Res) const; + void relaxInstruction(const MCInst &Inst, MCInst &Res) const; - bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const; - void HandleAssemblerFlag(MCAssemblerFlag Flag) { + void handleAssemblerFlag(MCAssemblerFlag Flag) { switch (Flag) { default: break; case MCAF_Code16: @@ -132,11 +164,13 @@ public: static unsigned getRelaxedOpcode(unsigned Op) { switch (Op) { default: return Op; - case ARM::tBcc: return ARM::t2Bcc; + case ARM::tBcc: return ARM::t2Bcc; + case ARM::tLDRpciASM: return ARM::t2LDRpci; + case ARM::tADR: return ARM::t2ADR; } } -bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const { +bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const { if (getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode()) return true; return false; @@ -146,17 +180,29 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCInstFragment *DF, const MCAsmLayout &Layout) const { - // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the - // low bit being an implied zero. There's an implied +4 offset for the - // branch, so we adjust the other way here to determine what's - // encodable. - // - // Relax if the value is too big for a (signed) i8. - int64_t Offset = int64_t(Value) - 4; - return Offset > 254 || Offset < -256; + switch ((unsigned)Fixup.getKind()) { + case ARM::fixup_arm_thumb_bcc: { + // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the + // low bit being an implied zero. There's an implied +4 offset for the + // branch, so we adjust the other way here to determine what's + // encodable. + // + // Relax if the value is too big for a (signed) i8. + int64_t Offset = int64_t(Value) - 4; + return Offset > 254 || Offset < -256; + } + case ARM::fixup_thumb_adr_pcrel_10: + case ARM::fixup_arm_thumb_cp: { + // If the immediate is negative, greater than 1020, or not a multiple + // of four, the wide version of the instruction must be used. + int64_t Offset = int64_t(Value) - 4; + return Offset > 1020 || Offset < 0 || Offset & 3; + } + } + llvm_unreachable("Unexpected fixup kind in fixupNeedsRelaxation()!"); } -void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { +void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode()); // Sanity check w/ diagnostic if we get here w/ a bogus instruction. @@ -174,7 +220,7 @@ void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { Res.setOpcode(RelaxedOp); } -bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { +bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8 const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP const uint32_t ARMv4_NopEncoding = 0xe1a0000; // using MOV r0,r0 @@ -309,6 +355,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: + case ARM::fixup_arm_bl: + case ARM::fixup_arm_blx: // These values don't encode the low two bits since they're always zero. // Offset by 8 just as above. return 0xffffff & ((Value - 8) >> 2); @@ -399,6 +447,17 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case ARM::fixup_arm_thumb_bcc: // Offset by 4 and don't encode the lower bit, which is always 0. return ((Value - 4) >> 1) & 0xff; + case ARM::fixup_arm_pcrel_10_unscaled: { + Value = Value - 8; // ARM fixups offset by an additional word and don't + // need to adjust for the half-word ordering. + bool isAdd = true; + if ((int64_t)Value < 0) { + Value = -Value; + isAdd = false; + } + assert ((Value < 256) && "Out of range pc-relative fixup value!"); + return Value | (isAdd << 23); + } case ARM::fixup_arm_pcrel_10: Value = Value - 4; // ARM fixups offset by an additional word and don't // need to adjust for the half-word ordering. @@ -416,8 +475,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { assert ((Value < 256) && "Out of range pc-relative fixup value!"); Value |= isAdd << 23; - // Same addressing mode as fixup_arm_pcrel_10, - // but with 16-bit halfwords swapped. + // Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords + // swapped. if (Kind == ARM::fixup_t2_pcrel_10) { uint32_t swapped = (Value & 0xFFFF0000) >> 16; swapped |= (Value & 0x0000FFFF) << 16; @@ -435,22 +494,21 @@ namespace { // ELF is an ELF of course... class ELFARMAsmBackend : public ARMAsmBackend { public: - Triple::OSType OSType; + uint8_t OSABI; ELFARMAsmBackend(const Target &T, const StringRef TT, - Triple::OSType _OSType) - : ARMAsmBackend(T, TT), OSType(_OSType) { } + uint8_t _OSABI) + : ARMAsmBackend(T, TT), OSABI(_OSABI) { } - void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const; MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createELFObjectWriter(new ARMELFObjectWriter(OSType), OS, - /*IsLittleEndian*/ true); + return createARMELFObjectWriter(OS, OSABI); } }; // FIXME: Raise this to share code between Darwin and ELF. -void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, +void ELFARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { unsigned NumBytes = 4; // FIXME: 2 for Thumb Value = adjustFixupValue(Fixup.getKind(), Value); @@ -479,7 +537,7 @@ public: Subtype); } - void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const; virtual bool doesSectionRequireSymbols(const MCSection &Section) const { @@ -504,9 +562,12 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case ARM::fixup_arm_thumb_cb: return 2; + case ARM::fixup_arm_pcrel_10_unscaled: case ARM::fixup_arm_ldst_pcrel_12: case ARM::fixup_arm_pcrel_10: case ARM::fixup_arm_adr_pcrel_12: + case ARM::fixup_arm_bl: + case ARM::fixup_arm_blx: case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: return 3; @@ -531,7 +592,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { } } -void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, +void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); Value = adjustFixupValue(Fixup.getKind(), Value); @@ -567,5 +628,6 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT) { if (TheTriple.isOSWindows()) assert(0 && "Windows not supported on ARM"); - return new ELFARMAsmBackend(T, TT, Triple(TT).getOS()); + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); + return new ELFARMAsmBackend(T, TT, OSABI); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index ec4b6ff..06eb4e5 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -67,7 +67,6 @@ namespace ARMCC { inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { switch (CC) { - default: llvm_unreachable("Unknown condition code"); case ARMCC::EQ: return "eq"; case ARMCC::NE: return "ne"; case ARMCC::HS: return "hs"; @@ -84,6 +83,7 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { case ARMCC::LE: return "le"; case ARMCC::AL: return "al"; } + llvm_unreachable("Unknown condition code"); } namespace ARM_PROC { @@ -185,6 +185,23 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) { case S29: case D29: return 29; case S30: case D30: return 30; case S31: case D31: return 31; + + // Composite registers use the regnum of the first register in the list. + case D1_D2: return 1; + case D3_D5: return 3; + case D5_D7: return 5; + case D7_D9: return 7; + case D9_D10: return 9; + case D11_D12: return 11; + case D13_D14: return 13; + case D15_D16: return 15; + case D17_D18: return 17; + case D19_D20: return 19; + case D21_D22: return 21; + case D23_D24: return 23; + case D25_D26: return 25; + case D27_D28: return 27; + case D29_D30: return 29; } } @@ -237,7 +254,6 @@ namespace ARMII { inline static const char *AddrModeToString(AddrMode addrmode) { switch (addrmode) { - default: llvm_unreachable("Unknown memory operation"); case AddrModeNone: return "AddrModeNone"; case AddrMode1: return "AddrMode1"; case AddrMode2: return "AddrMode2"; diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp new file mode 100644 index 0000000..5476a46 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -0,0 +1,281 @@ +//===-- ARMELFObjectWriter.cpp - ARM ELF Writer ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/ARMFixupKinds.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCValue.h" + +using namespace llvm; + +namespace { + class ARMELFObjectWriter : public MCELFObjectTargetWriter { + enum { DefaultEABIVersion = 0x05000000U }; + unsigned GetRelocTypeInner(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const; + + + public: + ARMELFObjectWriter(uint8_t OSABI); + + virtual ~ARMELFObjectWriter(); + + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend) const; + virtual unsigned getEFlags() const; + virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F, + const MCFixup &Fixup, + bool IsPCRel) const; + }; +} + +ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI) + : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, + ELF::EM_ARM, + /*HasRelocationAddend*/ false) {} + +ARMELFObjectWriter::~ARMELFObjectWriter() {} + +// FIXME: get the real EABI Version from the Triple. +unsigned ARMELFObjectWriter::getEFlags() const { + return ELF::EF_ARM_EABIMASK & DefaultEABIVersion; +} + +// In ARM, _MergedGlobals and other most symbols get emitted directly. +// I.e. not as an offset to a section symbol. +// This code is an approximation of what ARM/gcc does. + +STATISTIC(PCRelCount, "Total number of PIC Relocations"); +STATISTIC(NonPCRelCount, "Total number of non-PIC relocations"); + +const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F, + const MCFixup &Fixup, + bool IsPCRel) const { + const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol(); + bool EmitThisSym = false; + + const MCSectionELF &Section = + static_cast<const MCSectionELF&>(Symbol.getSection()); + bool InNormalSection = true; + unsigned RelocType = 0; + RelocType = GetRelocTypeInner(Target, Fixup, IsPCRel); + + DEBUG( + const MCSymbolRefExpr::VariantKind Kind = Target.getSymA()->getKind(); + MCSymbolRefExpr::VariantKind Kind2; + Kind2 = Target.getSymB() ? Target.getSymB()->getKind() : + MCSymbolRefExpr::VK_None; + dbgs() << "considering symbol " + << Section.getSectionName() << "/" + << Symbol.getName() << "/" + << " Rel:" << (unsigned)RelocType + << " Kind: " << (int)Kind << "/" << (int)Kind2 + << " Tmp:" + << Symbol.isAbsolute() << "/" << Symbol.isDefined() << "/" + << Symbol.isVariable() << "/" << Symbol.isTemporary() + << " Counts:" << PCRelCount << "/" << NonPCRelCount << "\n"); + + if (IsPCRel) { ++PCRelCount; + switch (RelocType) { + default: + // Most relocation types are emitted as explicit symbols + InNormalSection = + StringSwitch<bool>(Section.getSectionName()) + .Case(".data.rel.ro.local", false) + .Case(".data.rel", false) + .Case(".bss", false) + .Default(true); + EmitThisSym = true; + break; + case ELF::R_ARM_ABS32: + // But things get strange with R_ARM_ABS32 + // In this case, most things that go in .rodata show up + // as section relative relocations + InNormalSection = + StringSwitch<bool>(Section.getSectionName()) + .Case(".data.rel.ro.local", false) + .Case(".data.rel", false) + .Case(".rodata", false) + .Case(".bss", false) + .Default(true); + EmitThisSym = false; + break; + } + } else { + NonPCRelCount++; + InNormalSection = + StringSwitch<bool>(Section.getSectionName()) + .Case(".data.rel.ro.local", false) + .Case(".rodata", false) + .Case(".data.rel", false) + .Case(".bss", false) + .Default(true); + + switch (RelocType) { + default: EmitThisSym = true; break; + case ELF::R_ARM_ABS32: EmitThisSym = false; break; + } + } + + if (EmitThisSym) + return &Symbol; + if (! Symbol.isTemporary() && InNormalSection) { + return &Symbol; + } + return NULL; +} + +// Need to examine the Fixup when determining whether to +// emit the relocation as an explicit symbol or as a section relative +// offset +unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel, + bool IsRelocWithSymbol, + int64_t Addend) const { + return GetRelocTypeInner(Target, Fixup, IsPCRel); +} + +unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ? + MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); + + unsigned Type = 0; + if (IsPCRel) { + switch ((unsigned)Fixup.getKind()) { + default: llvm_unreachable("Unimplemented"); + case FK_Data_4: + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_ARM_REL32; + break; + case MCSymbolRefExpr::VK_ARM_TLSGD: + llvm_unreachable("unimplemented"); + case MCSymbolRefExpr::VK_ARM_GOTTPOFF: + Type = ELF::R_ARM_TLS_IE32; + break; + } + break; + case ARM::fixup_arm_bl: + case ARM::fixup_arm_blx: + case ARM::fixup_arm_uncondbranch: + switch (Modifier) { + case MCSymbolRefExpr::VK_ARM_PLT: + Type = ELF::R_ARM_PLT32; + break; + default: + Type = ELF::R_ARM_CALL; + break; + } + break; + case ARM::fixup_arm_condbranch: + Type = ELF::R_ARM_JUMP24; + break; + case ARM::fixup_arm_movt_hi16: + case ARM::fixup_arm_movt_hi16_pcrel: + Type = ELF::R_ARM_MOVT_PREL; + break; + case ARM::fixup_arm_movw_lo16: + case ARM::fixup_arm_movw_lo16_pcrel: + Type = ELF::R_ARM_MOVW_PREL_NC; + break; + case ARM::fixup_t2_movt_hi16: + case ARM::fixup_t2_movt_hi16_pcrel: + Type = ELF::R_ARM_THM_MOVT_PREL; + break; + case ARM::fixup_t2_movw_lo16: + case ARM::fixup_t2_movw_lo16_pcrel: + Type = ELF::R_ARM_THM_MOVW_PREL_NC; + break; + case ARM::fixup_arm_thumb_bl: + case ARM::fixup_arm_thumb_blx: + Type = ELF::R_ARM_THM_CALL; + break; + } + } else { + switch ((unsigned)Fixup.getKind()) { + default: llvm_unreachable("invalid fixup kind!"); + case FK_Data_4: + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_ARM_GOT: + Type = ELF::R_ARM_GOT_BREL; + break; + case MCSymbolRefExpr::VK_ARM_TLSGD: + Type = ELF::R_ARM_TLS_GD32; + break; + case MCSymbolRefExpr::VK_ARM_TPOFF: + Type = ELF::R_ARM_TLS_LE32; + break; + case MCSymbolRefExpr::VK_ARM_GOTTPOFF: + Type = ELF::R_ARM_TLS_IE32; + break; + case MCSymbolRefExpr::VK_None: + Type = ELF::R_ARM_ABS32; + break; + case MCSymbolRefExpr::VK_ARM_GOTOFF: + Type = ELF::R_ARM_GOTOFF32; + break; + case MCSymbolRefExpr::VK_ARM_TARGET1: + Type = ELF::R_ARM_TARGET1; + break; + } + break; + case ARM::fixup_arm_ldst_pcrel_12: + case ARM::fixup_arm_pcrel_10: + case ARM::fixup_arm_adr_pcrel_12: + case ARM::fixup_arm_thumb_bl: + case ARM::fixup_arm_thumb_cb: + case ARM::fixup_arm_thumb_cp: + case ARM::fixup_arm_thumb_br: + llvm_unreachable("Unimplemented"); + case ARM::fixup_arm_uncondbranch: + Type = ELF::R_ARM_CALL; + break; + case ARM::fixup_arm_condbranch: + Type = ELF::R_ARM_JUMP24; + break; + case ARM::fixup_arm_movt_hi16: + Type = ELF::R_ARM_MOVT_ABS; + break; + case ARM::fixup_arm_movw_lo16: + Type = ELF::R_ARM_MOVW_ABS_NC; + break; + case ARM::fixup_t2_movt_hi16: + Type = ELF::R_ARM_THM_MOVT_ABS; + break; + case ARM::fixup_t2_movw_lo16: + Type = ELF::R_ARM_THM_MOVW_ABS_NC; + break; + } + } + + return Type; +} + +MCObjectWriter *llvm::createARMELFObjectWriter(raw_ostream &OS, + uint8_t OSABI) { + MCELFObjectTargetWriter *MOTW = new ARMELFObjectWriter(OSABI); + return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); +} diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index 350c92d..1827986 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h +++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h @@ -1,4 +1,4 @@ -//===-- ARM/ARMFixupKinds.h - ARM Specific Fixup Entries --------*- C++ -*-===// +//===-- ARMFixupKinds.h - ARM Specific Fixup Entries ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -23,6 +23,9 @@ enum Fixups { // the 16-bit halfwords reordered. fixup_t2_ldst_pcrel_12, + // fixup_arm_pcrel_10_unscaled - 10-bit PC relative relocation for symbol + // addresses used in LDRD/LDRH/LDRB/etc. instructions. All bits are encoded. + fixup_arm_pcrel_10_unscaled, // fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses // used in VFP instructions where the lower 2 bits are not encoded // (so it's encoded as an 8-bit immediate). @@ -56,6 +59,12 @@ enum Fixups { // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions. fixup_arm_thumb_br, + // fixup_arm_bl - Fixup for ARM BL instructions. + fixup_arm_bl, + + // fixup_arm_blx - Fixup for ARM BLX instructions. + fixup_arm_blx, + // fixup_arm_thumb_bl - Fixup for Thumb BL instructions. fixup_arm_thumb_bl, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index 1c109e0..03e8d5f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -1,4 +1,4 @@ -//===-- ARMMCAsmInfo.cpp - ARM asm properties -------------------*- C++ -*-===// +//===-- ARMMCAsmInfo.cpp - ARM asm properties -----------------------------===// // // The LLVM Compiler Infrastructure // @@ -48,6 +48,8 @@ static const char *const arm_asm_table[] = { 0,0 }; +void ARMMCAsmInfoDarwin::anchor() { } + ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() { AsmTransCBE = arm_asm_table; Data64bitsDirective = 0; @@ -61,6 +63,8 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() { ExceptionsType = ExceptionHandling::SjLj; } +void ARMELFMCAsmInfo::anchor() { } + ARMELFMCAsmInfo::ARMELFMCAsmInfo() { // ".comm align is in bytes but .align is pow-2." AlignmentIsInBytes = false; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h index 90f7822..f0b289c 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h @@ -1,4 +1,4 @@ -//=====-- ARMMCAsmInfo.h - ARM asm properties -------------*- C++ -*--====// +//===-- ARMMCAsmInfo.h - ARM asm properties --------------------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -18,11 +18,15 @@ namespace llvm { - struct ARMMCAsmInfoDarwin : public MCAsmInfoDarwin { + class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin { + virtual void anchor(); + public: explicit ARMMCAsmInfoDarwin(); }; - struct ARMELFMCAsmInfo : public MCAsmInfo { + class ARMELFMCAsmInfo : public MCAsmInfo { + virtual void anchor(); + public: explicit ARMELFMCAsmInfo(); }; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index c38a882..4445dcd 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -64,7 +64,7 @@ public: // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. - unsigned getBinaryCodeForInstr(const MCInst &MI, + uint64_t getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups) const; /// getMachineOpValue - Return binary encoding of operand. If the machine @@ -118,8 +118,10 @@ public: /// branch target. uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const; + uint32_t getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; uint32_t getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups) const; /// getAdrLabelOpValue - Return encoding info for 12-bit immediate /// ADR label target. @@ -166,7 +168,7 @@ public: SmallVectorImpl<MCFixup> &Fixups) const { ARM_AM::AMSubMode Mode = (ARM_AM::AMSubMode)MI.getOperand(OpIdx).getImm(); switch (Mode) { - default: assert(0 && "Unknown addressing sub-mode!"); + default: llvm_unreachable("Unknown addressing sub-mode!"); case ARM_AM::da: return 0; case ARM_AM::ia: return 1; case ARM_AM::db: return 2; @@ -177,7 +179,6 @@ public: /// unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const { switch (ShOpc) { - default: llvm_unreachable("Unknown shift opc!"); case ARM_AM::no_shift: case ARM_AM::lsl: return 0; case ARM_AM::lsr: return 1; @@ -185,7 +186,7 @@ public: case ARM_AM::ror: case ARM_AM::rrx: return 3; } - return 0; + llvm_unreachable("Invalid ShiftOpc!"); } /// getAddrMode2OpValue - Return encoding for addrmode2 operands. @@ -423,7 +424,6 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, } llvm_unreachable("Unable to encode MCOperand!"); - return 0; } /// getAddrModeImmOpValue - Return encoding info for 'reg +/- imm' operand. @@ -466,7 +466,7 @@ static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, assert(MO.isExpr() && "Unexpected branch target type!"); const MCExpr *Expr = MO.getExpr(); MCFixupKind Kind = MCFixupKind(FixupKind); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); // All of the information is in the fixup. return 0; @@ -594,16 +594,21 @@ getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, } uint32_t ARMMCCodeEmitter:: +getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_bl, Fixups); + + return MO.getImm() >> 2; +} + +uint32_t ARMMCCodeEmitter:: getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand MO = MI.getOperand(OpIdx); - if (MO.isExpr()) { - if (HasConditionalBranch(MI)) - return ::getBranchTargetOpValue(MI, OpIdx, - ARM::fixup_arm_condbranch, Fixups); - return ::getBranchTargetOpValue(MI, OpIdx, - ARM::fixup_arm_uncondbranch, Fixups); - } + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_blx, Fixups); return MO.getImm() >> 1; } @@ -718,12 +723,13 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12); else Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); ++MCNumCPRelocations; } else { Reg = ARM::PC; int32_t Offset = MO.getImm(); + // FIXME: Handle #-0. if (Offset < 0) { Offset *= -1; isAdd = false; @@ -791,8 +797,8 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, assert(MO.isExpr() && "Unexpected machine operand type!"); const MCExpr *Expr = MO.getExpr(); - MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + MCFixupKind Kind = MCFixupKind(ARM::fixup_t2_pcrel_10); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); ++MCNumCPRelocations; } else @@ -833,7 +839,7 @@ getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx, // but this is good enough for now. static bool EvaluateAsPCRel(const MCExpr *Expr) { switch (Expr->getKind()) { - default: assert(0 && "Unexpected expression type"); + default: llvm_unreachable("Unexpected expression type"); case MCExpr::SymbolRef: return false; case MCExpr::Binary: return true; } @@ -857,7 +863,7 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx, MCFixupKind Kind; switch (ARM16Expr->getKind()) { - default: assert(0 && "Unsupported ARMFixup"); + default: llvm_unreachable("Unsupported ARMFixup"); case ARMMCExpr::VK_ARM_HI16: if (!isTargetDarwin() && EvaluateAsPCRel(E)) Kind = MCFixupKind(isThumb2() @@ -879,12 +885,11 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx, : ARM::fixup_arm_movw_lo16); break; } - Fixups.push_back(MCFixup::Create(0, E, Kind)); + Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc())); return 0; }; llvm_unreachable("Unsupported MCExpr type in MCOperand!"); - return 0; } uint32_t ARMMCCodeEmitter:: @@ -993,6 +998,19 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx+1); const MCOperand &MO2 = MI.getOperand(OpIdx+2); + + // If The first operand isn't a register, we have a label reference. + if (!MO.isReg()) { + unsigned Rn = getARMRegisterNumbering(ARM::PC); // Rn is PC. + + assert(MO.isExpr() && "Unexpected machine operand type!"); + const MCExpr *Expr = MO.getExpr(); + MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10_unscaled); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); + + ++MCNumCPRelocations; + return (Rn << 9) | (1 << 13); + } unsigned Rn = getARMRegisterNumbering(MO.getReg()); unsigned Imm = MO2.getImm(); bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add; @@ -1066,7 +1084,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, Kind = MCFixupKind(ARM::fixup_t2_pcrel_10); else Kind = MCFixupKind(ARM::fixup_arm_pcrel_10); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); ++MCNumCPRelocations; } else { @@ -1372,11 +1390,11 @@ getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op, switch (Imm.getImm()) { default: break; - case 2: - case 4: case 8: - case 16: Align = 0x00; break; - case 32: Align = 0x03; break; + case 16: + case 32: // Default '0' value for invalid alignments of 8, 16, 32 bytes. + case 2: Align = 0x00; break; + case 4: Align = 0x03; break; } return RegNo | (Align << 4); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp index 2727ba8..22e14a2 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp @@ -21,7 +21,7 @@ ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr, void ARMMCExpr::PrintImpl(raw_ostream &OS) const { switch (Kind) { - default: assert(0 && "Invalid kind!"); + default: llvm_unreachable("Invalid kind!"); case VK_ARM_HI16: OS << ":upper16:"; break; case VK_ARM_LO16: OS << ":lower16:"; break; } @@ -45,8 +45,7 @@ ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) { switch (Value->getKind()) { case MCExpr::Target: - assert(0 && "Can't handle nested target expr!"); - break; + llvm_unreachable("Can't handle nested target expr!"); case MCExpr::Constant: break; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index 0a2e883..a727e08 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h @@ -1,4 +1,4 @@ -//===-- ARMMCExpr.h - ARM specific MC expression classes ------------------===// +//===-- ARMMCExpr.h - ARM specific MC expression classes --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index e86f48e..1606b92 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -1,4 +1,4 @@ -//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions -----------*- C++ -*-===// +//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions ---------------------===// // // The LLVM Compiler Infrastructure // @@ -155,7 +155,6 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, if (TheTriple.isOSWindows()) { llvm_unreachable("ARM does not support Windows COFF format"); - return NULL; } return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); @@ -164,9 +163,10 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, static MCInstPrinter *createARMMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) - return new ARMInstPrinter(MAI, STI); + return new ARMInstPrinter(MAI, MRI, STI); return 0; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index 9b3d3bd..88472d7 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -46,6 +46,10 @@ MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII, MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT); +/// createARMELFObjectWriter - Construct an ELF Mach-O object writer. +MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS, + uint8_t OSABI); + /// createARMMachObjectWriter - Construct an ARM Mach-O object writer. MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS, bool Is64Bit, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index f394b4f..faf73ac 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -13,6 +13,7 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" @@ -81,6 +82,8 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, case ARM::fixup_arm_adr_pcrel_12: case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: + case ARM::fixup_arm_bl: + case ARM::fixup_arm_blx: RelocType = unsigned(macho::RIT_ARM_Branch24Bit); // Report as 'long', even though that is not quite accurate. Log2Size = llvm::Log2_32(4); @@ -136,7 +139,8 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer, MCSymbolData *A_SD = &Asm.getSymbolData(*A); if (!A_SD->getFragment()) - report_fatal_error("symbol '" + A->getName() + + Asm.getContext().FatalError(Fixup.getLoc(), + "symbol '" + A->getName() + "' can not be undefined in a subtraction expression"); uint32_t Value = Writer->getSymbolAddress(A_SD, Layout); @@ -149,7 +153,8 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer, MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); if (!B_SD->getFragment()) - report_fatal_error("symbol '" + B->getSymbol().getName() + + Asm.getContext().FatalError(Fixup.getLoc(), + "symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); // Select the appropriate difference relocation type. @@ -240,7 +245,8 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, MCSymbolData *A_SD = &Asm.getSymbolData(*A); if (!A_SD->getFragment()) - report_fatal_error("symbol '" + A->getName() + + Asm.getContext().FatalError(Fixup.getLoc(), + "symbol '" + A->getName() + "' can not be undefined in a subtraction expression"); uint32_t Value = Writer->getSymbolAddress(A_SD, Layout); @@ -252,7 +258,8 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); if (!B_SD->getFragment()) - report_fatal_error("symbol '" + B->getSymbol().getName() + + Asm.getContext().FatalError(Fixup.getLoc(), + "symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); // Select the appropriate difference relocation type. @@ -294,10 +301,13 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); unsigned Log2Size; unsigned RelocType = macho::RIT_Vanilla; - if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) { - report_fatal_error("unknown ARM fixup kind!"); - return; - } + if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) + // If we failed to get fixup kind info, it's because there's no legal + // relocation type for the fixup kind. This happens when it's a fixup that's + // expected to always be resolvable at assembly time and not have any + // relocations needed. + Asm.getContext().FatalError(Fixup.getLoc(), + "unsupported relocation on symbol"); // If this is a difference or a defined symbol plus an offset, then we need a // scattered relocation entry. Differences always require scattered diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt index f2cf78a..2565994 100644 --- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt @@ -1,10 +1,12 @@ add_llvm_library(LLVMARMDesc ARMAsmBackend.cpp + ARMELFObjectWriter.cpp ARMMCAsmInfo.cpp ARMMCCodeEmitter.cpp ARMMCExpr.cpp ARMMCTargetDesc.cpp ARMMachObjectWriter.cpp + ARMELFObjectWriter.cpp ) add_dependencies(LLVMARMDesc ARMCommonTableGen) diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 000a37f..2899836 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -1,4 +1,4 @@ -//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ----------=// +//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 2f6842e..4fcaecf 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -699,3 +699,19 @@ test is equality test so it's more a conditional move rather than a select: Currently this is a ARM specific dag combine. We probably should make it into a target-neutral one. + +//===---------------------------------------------------------------------===// + +Optimize unnecessary checks for zero with __builtin_clz/ctz. Those builtins +are specified to be undefined at zero, so portable code must check for zero +and handle it as a special case. That is unnecessary on ARM where those +operations are implemented in a way that is well-defined for zero. For +example: + +int f(int x) { return x ? __builtin_clz(x) : sizeof(int)*8; } + +should just be implemented with a CLZ instruction. Since there are other +targets, e.g., PPC, that share this behavior, it would be best to implement +this in a target-independent way: we should probably fold that (when using +"undefined at zero" semantics) to set the "defined at zero" bit and have +the code generator expand out the right code. diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index d848177..a89a663 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -1,4 +1,4 @@ -//======- Thumb1FrameLowering.cpp - Thumb1 Frame Information ---*- C++ -*-====// +//===-- Thumb1FrameLowering.cpp - Thumb1 Frame Information ----------------===// // // The LLVM Compiler Infrastructure // @@ -101,7 +101,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::R11: if (Reg == FramePtr) FramePtrSpillFI = FI; - if (STI.isTargetDarwin()) { + if (STI.isTargetIOS()) { AFI->addGPRCalleeSavedArea2Frame(FI); GPRCS2Size += 4; } else { @@ -175,14 +175,14 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setShouldRestoreSPFromFP(true); } -static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { +static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) { for (unsigned i = 0; CSRegs[i]; ++i) if (Reg == CSRegs[i]) return true; return false; } -static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { +static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) { if (MI->getOpcode() == ARM::tLDRspi && MI->getOperand(1).isFI() && isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)) @@ -214,7 +214,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); int NumBytes = (int)MFI->getStackSize(); - const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { @@ -278,8 +278,11 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) - .addReg(ARM::R3, RegState::Kill)); + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) + .addReg(ARM::R3, RegState::Kill); + AddDefaultPred(MIB); + MIB->copyImplicitOps(&*MBBI); // erase the old tBX_RET instruction MBB.erase(MBBI); } @@ -350,6 +353,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, continue; Reg = ARM::PC; (*MIB).setDesc(TII.get(ARM::tPOP_RET)); + MIB->copyImplicitOps(&*MI); MI = MBB.erase(MI); } MIB.addReg(Reg, getDefRegState(true)); diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index de33bd6..adaccdd 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -1,4 +1,4 @@ -//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information ----*- C++ -*-===// +//===-- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information -------------===// // // The LLVM Compiler Infrastructure // @@ -19,7 +19,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/ADT/SmallVector.h" -#include "Thumb1InstrInfo.h" +#include "llvm/MC/MCInst.h" using namespace llvm; @@ -27,6 +27,15 @@ Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI(*this, STI) { } +/// getNoopForMachoTarget - Return the noop instruction to use for a noop. +void Thumb1InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { + NopInst.setOpcode(ARM::tMOVr); + NopInst.addOperand(MCOperand::CreateReg(ARM::R8)); + NopInst.addOperand(MCOperand::CreateReg(ARM::R8)); + NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + NopInst.addOperand(MCOperand::CreateReg(0)); +} + unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index 17ef2f7..4d97626 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -1,4 +1,4 @@ -//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ------*- C++ -*-===// +//===-- Thumb1InstrInfo.h - Thumb-1 Instruction Information -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -27,6 +27,9 @@ class Thumb1InstrInfo : public ARMBaseInstrInfo { public: explicit Thumb1InstrInfo(const ARMSubtarget &STI); + /// getNoopForMachoTarget - Return the noop instruction to use for a noop. + void getNoopForMachoTarget(MCInst &NopInst) const; + // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index e61c0a7..6b8bf0e 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information ----*- C++ -*-===// +//===-- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------------===// // // The LLVM Compiler Infrastructure // @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" @@ -570,6 +571,11 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, // If this instruction affects R12, adjust our restore point. for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) { const MachineOperand &MO = II->getOperand(i); + if (MO.isRegMask() && MO.clobbersPhysReg(ARM::R12)) { + UseMI = II; + done = true; + break; + } if (!MO.isReg() || MO.isUndef() || !MO.getReg() || TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; @@ -624,6 +630,21 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, FrameReg = BasePtr; } + // PEI::scavengeFrameVirtualRegs() cannot accurately track SPAdj because the + // call frame setup/destroy instructions have already been eliminated. That + // means the stack pointer cannot be used to access the emergency spill slot + // when !hasReservedCallFrame(). +#ifndef NDEBUG + if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){ + assert(MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF) && + "Cannot use SP to access the emergency spill slot in " + "functions without a reserved call frame"); + assert(!MF.getFrameInfo()->hasVarSizedObjects() && + "Cannot use SP to access the emergency spill slot in " + "functions with variable sized frame objects"); + } +#endif // NDEBUG + // Special handling of dbg_value instructions. if (MI.isDebugValue()) { MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); @@ -694,7 +715,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // register. The offset is already handled in the vreg value. MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false); } else { - assert(false && "Unexpected opcode!"); + llvm_unreachable("Unexpected opcode!"); } // Add predicate back if it's needed. diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index 55b4d30..def75dd 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -1,4 +1,4 @@ -//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks ----------*- C++ -*-===// +//===-- Thumb2ITBlockPass.cpp - Insert Thumb-2 IT blocks ------------------===// // // The LLVM Compiler Infrastructure // @@ -76,7 +76,7 @@ static void TrackDefUses(MachineInstr *MI, for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { unsigned Reg = LocalUses[i]; Uses.insert(Reg); - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) Uses.insert(*Subreg); } @@ -84,7 +84,7 @@ static void TrackDefUses(MachineInstr *MI, for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { unsigned Reg = LocalDefs[i]; Defs.insert(Reg); - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) Defs.insert(*Subreg); if (Reg == ARM::CPSR) @@ -239,7 +239,8 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill(); // Finalize the bundle. - FinalizeBundle(MBB, InsertPos.getInstrIterator(), LastITMI); + MachineBasicBlock::instr_iterator LI = LastITMI; + finalizeBundle(MBB, InsertPos.getInstrIterator(), llvm::next(LI)); Modified = true; ++NumITs; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 7ec3c0e..6cb182a 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -1,4 +1,4 @@ -//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information ----*- C++ -*-===// +//===-- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information -------------===// // // The LLVM Compiler Infrastructure // @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -34,6 +35,13 @@ Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI(*this, STI) { } +/// getNoopForMachoTarget - Return the noop instruction to use for a noop. +void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { + NopInst.setOpcode(ARM::tNOP); + NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + NopInst.addOperand(MCOperand::CreateReg(0)); +} + unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const { // FIXME return 0; @@ -586,7 +594,7 @@ Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI, ARMCC::CondCodes NCC = llvm::getInstrPredicate(NMI, PredReg); if (!(NCC == CC || NCC == OCC) || NMI->modifiesRegister(SrcReg, &TRI) || - NMI->definesRegister(ARM::CPSR)) + NMI->modifiesRegister(ARM::CPSR, &TRI)) break; if (++NumInsts == 4) // Too many in a row! diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index f2637d7..a754649 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -1,4 +1,4 @@ -//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ------*- C++ -*-===// +//===-- Thumb2InstrInfo.h - Thumb-2 Instruction Information -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -28,6 +28,9 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo { public: explicit Thumb2InstrInfo(const ARMSubtarget &STI); + /// getNoopForMachoTarget - Return the noop instruction to use for a noop. + void getNoopForMachoTarget(MCInst &NopInst) const; + // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index 355c3bf..6d210fe 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- Thumb2RegisterInfo.cpp - Thumb-2 Register Information ----*- C++ -*-===// +//===-- Thumb2RegisterInfo.cpp - Thumb-2 Register Information -------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index e206288..5ee5f42 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -597,7 +597,24 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, unsigned Reg0 = MI->getOperand(0).getReg(); unsigned Reg1 = MI->getOperand(1).getReg(); - if (Reg0 != Reg1) { + // t2MUL is "special". The tied source operand is second, not first. + if (MI->getOpcode() == ARM::t2MUL) { + unsigned Reg2 = MI->getOperand(2).getReg(); + // Early exit if the regs aren't all low regs. + if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1) + || !isARMLowRegister(Reg2)) + return false; + if (Reg0 != Reg2) { + // If the other operand also isn't the same as the destination, we + // can't reduce. + if (Reg1 != Reg0) + return false; + // Try to commute the operands to make it a 2-address instruction. + MachineInstr *CommutedMI = TII->commuteInstruction(MI); + if (!CommutedMI) + return false; + } + } else if (Reg0 != Reg1) { // Try to commute the operands to make it a 2-address instruction. unsigned CommOpIdx1, CommOpIdx2; if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) || @@ -880,14 +897,17 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { } ProcessNext: - if (LiveCPSR && - NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle() && - BundleMI->killsRegister(ARM::CPSR)) + if (NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle()) { // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill // marker is only on the BUNDLE instruction. Process the BUNDLE // instruction as we finish with the bundled instruction to work around // the inconsistency. - LiveCPSR = false; + if (BundleMI->killsRegister(ARM::CPSR)) + LiveCPSR = false; + MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR); + if (MO && !MO->isDead()) + LiveCPSR = true; + } bool DefCPSR = false; LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); |