diff options
Diffstat (limited to 'lib/Target/ARM')
52 files changed, 1741 insertions, 975 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index b05fe62..9b0cb0c 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -38,9 +38,6 @@ def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable NEON instructions", [FeatureVFP3]>; -def FeatureNEON2 : SubtargetFeature<"neon2", "HasNEON2", "true", - "Enable Advanced SIMD2 instructions", - [FeatureNEON]>; def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", "Enable Thumb2 instructions">; def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", @@ -76,8 +73,6 @@ def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", "true", "Use NEON for single precision FP">; -// Allow more precision in FP computation -def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">; // Disable 32-bit to 16-bit narrowing for experimentation. def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index ca30716..410790a 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -16,7 +16,6 @@ #include "ARMAsmPrinter.h" #include "ARM.h" #include "ARMBuildAttrs.h" -#include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "ARMTargetMachine.h" @@ -35,7 +34,6 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCObjectStreamer.h" @@ -44,8 +42,6 @@ #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -732,8 +728,9 @@ void ARMAsmPrinter::emitAttributes() { if (Subtarget->hasNEON() && emitFPU) { /* NEON is not exactly a VFP architecture, but GAS emit one of * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ - if (Subtarget->hasNEON2()) - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon-vfpv4"); + if (Subtarget->hasVFP4()) + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, + "neon-vfpv4"); else AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon"); /* If emitted for NEON, omit from VFP below, since you can have both @@ -1270,7 +1267,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } // Darwin call instructions are just normal call instructions with different // clobber semantics (they clobber R9). - case ARM::BXr9_CALL: case ARM::BX_CALL: { { MCInst TmpInst; @@ -1292,7 +1288,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - case ARM::tBXr9_CALL: case ARM::tBX_CALL: { { MCInst TmpInst; @@ -1315,7 +1310,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - case ARM::BMOVPCRXr9_CALL: case ARM::BMOVPCRX_CALL: { { MCInst TmpInst; @@ -1343,7 +1337,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - case ARM::BMOVPCBr9_CALL: case ARM::BMOVPCB_CALL: { { MCInst TmpInst; @@ -1371,7 +1364,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - case ARM::t2BMOVPCBr9_CALL: case ARM::t2BMOVPCB_CALL: { { MCInst TmpInst; @@ -1984,10 +1976,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } { MCInst TmpInst; - TmpInst.setOpcode(ARM::tLDRr); + TmpInst.setOpcode(ARM::tLDRi); TmpInst.addOperand(MCOperand::CreateReg(ARM::R7)); TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); - TmpInst.addOperand(MCOperand::CreateReg(0)); + TmpInst.addOperand(MCOperand::CreateImm(0)); // Predicate. TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); TmpInst.addOperand(MCOperand::CreateReg(0)); diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 4b276c5..af3f75a 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -107,7 +107,7 @@ public: if (!Subtarget->isTargetDarwin()) return 0; return Subtarget->isThumb() ? - llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm; + ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm; } MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 366e2fa..c6280f8 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -13,10 +13,10 @@ #include "ARMBaseInstrInfo.h" #include "ARM.h" +#include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" #include "ARMHazardRecognizer.h" #include "ARMMachineFunctionInfo.h" -#include "ARMRegisterInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Constants.h" #include "llvm/Function.h" @@ -680,29 +680,51 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - // Generate instructions for VMOVQQ and VMOVQQQQ pseudos in place. - if (ARM::QQPRRegClass.contains(DestReg, SrcReg) || - ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) { + // Handle register classes that require multiple instructions. + unsigned BeginIdx = 0; + unsigned SubRegs = 0; + unsigned Spacing = 1; + + // Use VORRq when possible. + if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2; + else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4; + // Fall back to VMOVD. + else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2; + else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3; + else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4; + + else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2; + else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2; + else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2; + + if (Opc) { const TargetRegisterInfo *TRI = &getRegisterInfo(); - assert(ARM::qsub_0 + 3 == ARM::qsub_3 && "Expected contiguous enum."); - unsigned EndSubReg = ARM::QQPRRegClass.contains(DestReg, SrcReg) ? - ARM::qsub_1 : ARM::qsub_3; - for (unsigned i = ARM::qsub_0, e = EndSubReg + 1; i != e; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, i); - unsigned Src = TRI->getSubReg(SrcReg, i); - MachineInstrBuilder Mov = - AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VORRq)) - .addReg(Dst, RegState::Define) - .addReg(Src, getKillRegState(KillSrc)) - .addReg(Src, getKillRegState(KillSrc))); - if (i == EndSubReg) { - Mov->addRegisterDefined(DestReg, TRI); - if (KillSrc) - Mov->addRegisterKilled(SrcReg, TRI); - } + MachineInstrBuilder Mov; + for (unsigned i = 0; i != SubRegs; ++i) { + unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); + unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); + assert(Dst && Src && "Bad sub-register"); + Mov = AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) + .addReg(Src)); + // VORR takes two source operands. + if (Opc == ARM::VORRq) + Mov.addReg(Src); } + // Add implicit super-register defs and kills to the last instruction. + Mov->addRegisterDefined(DestReg, TRI); + if (KillSrc) + Mov->addRegisterKilled(SrcReg, TRI); return; } + llvm_unreachable("Impossible reg-to-reg copy"); } @@ -757,7 +779,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, llvm_unreachable("Unknown reg class!"); break; case 16: - if (ARM::QPRRegClass.hasSubClassEq(RC)) { + if (ARM::DPairRegClass.hasSubClassEq(RC)) { // Use aligned spills if the stack can be realigned. if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) @@ -907,7 +929,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, llvm_unreachable("Unknown reg class!"); break; case 16: - if (ARM::QPRRegClass.hasSubClassEq(RC)) { + if (ARM::DPairRegClass.hasSubClassEq(RC)) { if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) .addFrameIndex(FI).addImm(16) @@ -1478,6 +1500,29 @@ int llvm::getMatchingCondBranchOpcode(int Opc) { llvm_unreachable("Unknown unconditional branch opcode!"); } +/// commuteInstruction - Handle commutable instructions. +MachineInstr * +ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { + switch (MI->getOpcode()) { + case ARM::MOVCCr: + case ARM::t2MOVCCr: { + // MOVCC can be commuted by inverting the condition. + unsigned PredReg = 0; + ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); + // MOVCC AL can't be inverted. Shouldn't happen. + if (CC == ARMCC::AL || PredReg != ARM::CPSR) + return NULL; + MI = TargetInstrInfoImpl::commuteInstruction(MI, NewMI); + if (!MI) + return NULL; + // After swapping the MOVCC operands, also invert the condition. + MI->getOperand(MI->findFirstPredOperandIdx()) + .setImm(ARMCC::getOppositeCondition(CC)); + return MI; + } + } + return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); +} /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the /// instruction is encoded with an 'S' bit is determined by the optional CPSR @@ -1916,6 +1961,25 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, if (!MRI->hasOneNonDBGUse(Reg)) return false; + const MCInstrDesc &DefMCID = DefMI->getDesc(); + if (DefMCID.hasOptionalDef()) { + unsigned NumOps = DefMCID.getNumOperands(); + const MachineOperand &MO = DefMI->getOperand(NumOps-1); + if (MO.getReg() == ARM::CPSR && !MO.isDead()) + // If DefMI defines CPSR and it is not dead, it's obviously not safe + // to delete DefMI. + return false; + } + + const MCInstrDesc &UseMCID = UseMI->getDesc(); + if (UseMCID.hasOptionalDef()) { + unsigned NumOps = UseMCID.getNumOperands(); + if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR) + // If the instruction sets the flag, do not attempt this optimization + // since it may change the semantics of the code. + return false; + } + unsigned UseOpc = UseMI->getOpcode(); unsigned NewUseOpc = 0; uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm(); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 314e317..2fe8507 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -139,6 +139,8 @@ public: MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const; + MachineInstr *commuteInstruction(MachineInstr*, bool=false) const; + virtual bool produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1, const MachineRegisterInfo *MRI) const; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 291369f..3907f75 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -15,7 +15,6 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMFrameLowering.h" -#include "ARMInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index 2b9c55d..0bd1c3e 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -17,7 +17,6 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" -#include "ARMRegisterInfo.h" #include "ARMSubtarget.h" #include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index d33364b..b9a2512 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -9,10 +9,6 @@ // This describes the calling conventions for ARM architecture. //===----------------------------------------------------------------------===// -/// CCIfSubtarget - Match if the current subtarget has a feature F. -class CCIfSubtarget<string F, CCAction A>: - CCIf<!strconcat("State.getTarget().getSubtarget<ARMSubtarget>().", F), A>; - /// CCIfAlign - Match of the original alignment of the arg class CCIfAlign<string Align, CCAction A>: CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index e48d07a..bc681be 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -15,7 +15,7 @@ #define DEBUG_TYPE "jit" #include "ARM.h" #include "ARMConstantPoolValue.h" -#include "ARMInstrInfo.h" +#include "ARMBaseInstrInfo.h" #include "ARMRelocations.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" @@ -46,7 +46,7 @@ namespace { class ARMCodeEmitter : public MachineFunctionPass { ARMJITInfo *JTI; - const ARMInstrInfo *II; + const ARMBaseInstrInfo *II; const TargetData *TD; const ARMSubtarget *Subtarget; TargetMachine &TM; @@ -66,7 +66,7 @@ namespace { public: ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) : MachineFunctionPass(ID), JTI(0), - II((const ARMInstrInfo *)tm.getInstrInfo()), + II((const ARMBaseInstrInfo *)tm.getInstrInfo()), TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {} @@ -383,9 +383,9 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { assert((MF.getTarget().getRelocationModel() != Reloc::Default || MF.getTarget().getRelocationModel() != Reloc::Static) && "JIT relocation model must be set to static or default!"); - JTI = ((ARMTargetMachine &)MF.getTarget()).getJITInfo(); - II = ((const ARMTargetMachine &)MF.getTarget()).getInstrInfo(); - TD = ((const ARMTargetMachine &)MF.getTarget()).getTargetData(); + JTI = ((ARMBaseTargetMachine &)MF.getTarget()).getJITInfo(); + II = (const ARMBaseInstrInfo *)MF.getTarget().getInstrInfo(); + TD = MF.getTarget().getTargetData(); Subtarget = &TM.getSubtarget<ARMSubtarget>(); MCPEs = &MF.getConstantPool()->getConstants(); MJTEs = 0; @@ -917,9 +917,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { emitMiscBranchInstruction(MI); break; case ARM::BX_CALL: - case ARM::BMOVPCRX_CALL: - case ARM::BXr9_CALL: - case ARM::BMOVPCRXr9_CALL: { + case ARM::BMOVPCRX_CALL: { // First emit mov lr, pc unsigned Binary = 0x01a0e00f; Binary |= II->getPredicate(&MI) << ARMII::CondShift; diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 2cdfd1e..fc35c7c 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -16,12 +16,12 @@ #define DEBUG_TYPE "arm-cp-islands" #include "ARM.h" #include "ARMMachineFunctionInfo.h" -#include "ARMInstrInfo.h" #include "Thumb2InstrInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" @@ -209,8 +209,9 @@ namespace { } /// getMaxDisp - Returns the maximum displacement supported by MI. /// Correct for unknown alignment. + /// Conservatively subtract 2 bytes to handle weird alignment effects. unsigned getMaxDisp() const { - return KnownAlignment ? MaxDisp : MaxDisp - 2; + return (KnownAlignment ? MaxDisp : MaxDisp - 2) - 2; } }; @@ -266,7 +267,7 @@ namespace { MachineFunction *MF; MachineConstantPool *MCP; - const ARMInstrInfo *TII; + const ARMBaseInstrInfo *TII; const ARMSubtarget *STI; ARMFunctionInfo *AFI; bool isThumb; @@ -283,51 +284,52 @@ namespace { } private: - void DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs); + void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); unsigned getCPELogAlign(const MachineInstr *CPEMI); - void JumpTableFunctionScan(); - void InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs); - MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI); - void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB); - void AdjustBBOffsetsAfter(MachineBasicBlock *BB); - bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI); - int LookForExistingCPEntry(CPUser& U, unsigned UserOffset); - bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter); - void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset, + void scanFunctionJumpTables(); + void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs); + MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); + void updateForInsertedWaterBlock(MachineBasicBlock *NewBB); + void adjustBBOffsetsAfter(MachineBasicBlock *BB); + bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI); + int findInRangeCPEntry(CPUser& U, unsigned UserOffset); + bool findAvailableWater(CPUser&U, unsigned UserOffset, + water_iterator &WaterIter); + void createNewWater(unsigned CPUserIndex, unsigned UserOffset, MachineBasicBlock *&NewMBB); - bool HandleConstantPoolUser(unsigned CPUserIndex); - void RemoveDeadCPEMI(MachineInstr *CPEMI); - bool RemoveUnusedCPEntries(); - bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset, - MachineInstr *CPEMI, unsigned Disp, bool NegOk, - bool DoDump = false); - bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water, + bool handleConstantPoolUser(unsigned CPUserIndex); + void removeDeadCPEMI(MachineInstr *CPEMI); + bool removeUnusedCPEntries(); + bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, + MachineInstr *CPEMI, unsigned Disp, bool NegOk, + bool DoDump = false); + bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water, CPUser &U, unsigned &Growth); - bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); - bool FixUpImmediateBr(ImmBranch &Br); - bool FixUpConditionalBr(ImmBranch &Br); - bool FixUpUnconditionalBr(ImmBranch &Br); - bool UndoLRSpillRestore(); + bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); + bool fixupImmediateBr(ImmBranch &Br); + bool fixupConditionalBr(ImmBranch &Br); + bool fixupUnconditionalBr(ImmBranch &Br); + bool undoLRSpillRestore(); bool mayOptimizeThumb2Instruction(const MachineInstr *MI) const; - bool OptimizeThumb2Instructions(); - bool OptimizeThumb2Branches(); - bool ReorderThumb2JumpTables(); - bool OptimizeThumb2JumpTables(); - MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB, + bool optimizeThumb2Instructions(); + bool optimizeThumb2Branches(); + bool reorderThumb2JumpTables(); + bool optimizeThumb2JumpTables(); + MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB); - void ComputeBlockSize(MachineBasicBlock *MBB); - unsigned GetOffsetOf(MachineInstr *MI) const; - unsigned GetUserOffset(CPUser&) const; + void computeBlockSize(MachineBasicBlock *MBB); + unsigned getOffsetOf(MachineInstr *MI) const; + unsigned getUserOffset(CPUser&) const; void dumpBBs(); void verify(); - bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, + bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, unsigned Disp, bool NegativeOK, bool IsSoImm = false); - bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, + bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, const CPUser &U) { - return OffsetIsInRange(UserOffset, TrialOffset, + return isOffsetInRange(UserOffset, TrialOffset, U.getMaxDisp(), U.NegOk, U.IsSoImm); } }; @@ -345,11 +347,21 @@ void ARMConstantIslands::verify() { assert(BBInfo[MBBId].Offset % (1u << Align) == 0); assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); } + DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n"); for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { CPUser &U = CPUsers[i]; - unsigned UserOffset = GetUserOffset(U); - assert(CPEIsInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp(), U.NegOk) && - "Constant pool entry out of range!"); + unsigned UserOffset = getUserOffset(U); + // Verify offset using the real max displacement without the safety + // adjustment. + if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp()+2, U.NegOk, + /* DoDump = */ true)) { + DEBUG(dbgs() << "OK\n"); + continue; + } + DEBUG(dbgs() << "Out of range.\n"); + dumpBBs(); + DEBUG(MF->dump()); + llvm_unreachable("Constant pool entry out of range!"); } #endif } @@ -382,7 +394,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { << MCP->getConstants().size() << " CP entries, aligned to " << MCP->getConstantPoolAlignment() << " bytes *****\n"); - TII = (const ARMInstrInfo*)MF->getTarget().getInstrInfo(); + TII = (const ARMBaseInstrInfo*)MF->getTarget().getInstrInfo(); AFI = MF->getInfo<ARMFunctionInfo>(); STI = &MF->getTarget().getSubtarget<ARMSubtarget>(); @@ -392,6 +404,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { HasFarJump = false; + // This pass invalidates liveness information when it splits basic blocks. + MF->getRegInfo().invalidateLiveness(); + // Renumber all of the machine basic blocks in the function, guaranteeing that // the numbers agree with the position of the block in the function. MF->RenumberBlocks(); @@ -400,8 +415,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // of the TB[BH] instructions. bool MadeChange = false; if (isThumb2 && AdjustJumpTableBlocks) { - JumpTableFunctionScan(); - MadeChange |= ReorderThumb2JumpTables(); + scanFunctionJumpTables(); + MadeChange |= reorderThumb2JumpTables(); // Data is out of date, so clear it. It'll be re-computed later. T2JumpTables.clear(); // Blocks may have shifted around. Keep the numbering up to date. @@ -419,7 +434,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // we put them all at the end of the function. std::vector<MachineInstr*> CPEMIs; if (!MCP->isEmpty()) - DoInitialPlacement(CPEMIs); + doInitialPlacement(CPEMIs); /// The next UID to take is the first unused one. AFI->initPICLabelUId(CPEMIs.size()); @@ -427,13 +442,13 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // Do the initial scan of the function, building up information about the // sizes of each block, the location of all the water, and finding all of the // constant pool users. - InitialFunctionScan(CPEMIs); + initializeFunctionInfo(CPEMIs); CPEMIs.clear(); DEBUG(dumpBBs()); /// Remove dead constant pool entries. - MadeChange |= RemoveUnusedCPEntries(); + MadeChange |= removeUnusedCPEntries(); // Iteratively place constant pool entries and fix up branches until there // is no change. @@ -442,7 +457,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n'); bool CPChange = false; for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) - CPChange |= HandleConstantPoolUser(i); + CPChange |= handleConstantPoolUser(i); if (CPChange && ++NoCPIters > 30) report_fatal_error("Constant Island pass failed to converge!"); DEBUG(dumpBBs()); @@ -454,7 +469,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n'); bool BRChange = false; for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) - BRChange |= FixUpImmediateBr(ImmBranches[i]); + BRChange |= fixupImmediateBr(ImmBranches[i]); if (BRChange && ++NoBRIters > 30) report_fatal_error("Branch Fix Up pass failed to converge!"); DEBUG(dumpBBs()); @@ -466,7 +481,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // Shrink 32-bit Thumb2 branch, load, and store instructions. if (isThumb2 && !STI->prefers32BitThumb()) - MadeChange |= OptimizeThumb2Instructions(); + MadeChange |= optimizeThumb2Instructions(); // After a while, this might be made debug-only, but it is not expensive. verify(); @@ -474,7 +489,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // If LR has been forced spilled and no far jump (i.e. BL) has been issued, // undo the spill / restore of LR if possible. if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump()) - MadeChange |= UndoLRSpillRestore(); + MadeChange |= undoLRSpillRestore(); // Save the mapping between original and cloned constpool entries. for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { @@ -497,10 +512,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { return MadeChange; } -/// DoInitialPlacement - Perform the initial placement of the constant pool +/// doInitialPlacement - Perform the initial placement of the constant pool /// entries. To start with, we put them all at the end of the function. void -ARMConstantIslands::DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { +ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { // Create the basic block to hold the CPE's. MachineBasicBlock *BB = MF->CreateMachineBasicBlock(); MF->push_back(BB); @@ -610,10 +625,10 @@ unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { return Log2_32(Align); } -/// JumpTableFunctionScan - Do a scan of the function, building up +/// scanFunctionJumpTables - Do a scan of the function, building up /// information about the sizes of each block and the locations of all /// the jump tables. -void ARMConstantIslands::JumpTableFunctionScan() { +void ARMConstantIslands::scanFunctionJumpTables() { for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock &MBB = *MBBI; @@ -625,11 +640,11 @@ void ARMConstantIslands::JumpTableFunctionScan() { } } -/// InitialFunctionScan - Do the initial scan of the function, building up +/// initializeFunctionInfo - Do the initial scan of the function, building up /// information about the sizes of each block, the location of all the water, /// and finding all of the constant pool users. void ARMConstantIslands:: -InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs) { +initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { BBInfo.clear(); BBInfo.resize(MF->getNumBlockIDs()); @@ -638,14 +653,14 @@ InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs) { // alignment assumptions, as we don't know for sure the size of any // instructions in the inline assembly. for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) - ComputeBlockSize(I); + computeBlockSize(I); // The known bits of the entry block offset are determined by the function // alignment. BBInfo.front().KnownBits = MF->getAlignment(); // Compute block offsets and known bits. - AdjustBBOffsetsAfter(MF->begin()); + adjustBBOffsetsAfter(MF->begin()); // Now go back through the instructions and build up our data structures. for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); @@ -790,9 +805,9 @@ InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs) { } } -/// ComputeBlockSize - Compute the size and some alignment information for MBB. +/// computeBlockSize - Compute the size and some alignment information for MBB. /// This function updates BBInfo directly. -void ARMConstantIslands::ComputeBlockSize(MachineBasicBlock *MBB) { +void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) { BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; BBI.Size = 0; BBI.Unalign = 0; @@ -817,10 +832,10 @@ void ARMConstantIslands::ComputeBlockSize(MachineBasicBlock *MBB) { } } -/// GetOffsetOf - Return the current offset of the specified machine instruction +/// getOffsetOf - Return the current offset of the specified machine instruction /// from the start of the function. This offset changes as stuff is moved /// around inside the function. -unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const { +unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const { MachineBasicBlock *MBB = MI->getParent(); // The offset is composed of two things: the sum of the sizes of all MBB's @@ -843,10 +858,10 @@ static bool CompareMBBNumbers(const MachineBasicBlock *LHS, return LHS->getNumber() < RHS->getNumber(); } -/// UpdateForInsertedWaterBlock - When a block is newly inserted into the +/// updateForInsertedWaterBlock - When a block is newly inserted into the /// machine function, it upsets all of the block numbers. Renumber the blocks /// and update the arrays that parallel this numbering. -void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { +void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) { // Renumber the MBB's to keep them consecutive. NewBB->getParent()->RenumberBlocks(NewBB); @@ -866,7 +881,7 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { /// Split the basic block containing MI into two blocks, which are joined by /// an unconditional branch. Update data structures and renumber blocks to /// account for this change and returns the newly created block. -MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { +MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) { MachineBasicBlock *OrigBB = MI->getParent(); // Create a new MBB for the code after the OrigBB. @@ -897,7 +912,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { OrigBB->addSuccessor(NewBB); // Update internal data structures to account for the newly inserted MBB. - // This is almost the same as UpdateForInsertedWaterBlock, except that + // This is almost the same as updateForInsertedWaterBlock, except that // the Water goes after OrigBB, not NewBB. MF->RenumberBlocks(NewBB); @@ -924,23 +939,23 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { // the new jump we added. (It should be possible to do this without // recounting everything, but it's very confusing, and this is rarely // executed.) - ComputeBlockSize(OrigBB); + computeBlockSize(OrigBB); // Figure out how large the NewMBB is. As the second half of the original // block, it may contain a tablejump. - ComputeBlockSize(NewBB); + computeBlockSize(NewBB); // All BBOffsets following these blocks must be modified. - AdjustBBOffsetsAfter(OrigBB); + adjustBBOffsetsAfter(OrigBB); return NewBB; } -/// GetUserOffset - Compute the offset of U.MI as seen by the hardware +/// getUserOffset - Compute the offset of U.MI as seen by the hardware /// displacement computation. Update U.KnownAlignment to match its current /// basic block location. -unsigned ARMConstantIslands::GetUserOffset(CPUser &U) const { - unsigned UserOffset = GetOffsetOf(U.MI); +unsigned ARMConstantIslands::getUserOffset(CPUser &U) const { + unsigned UserOffset = getOffsetOf(U.MI); const BasicBlockInfo &BBI = BBInfo[U.MI->getParent()->getNumber()]; unsigned KnownBits = BBI.internalKnownBits(); @@ -960,13 +975,13 @@ unsigned ARMConstantIslands::GetUserOffset(CPUser &U) const { return UserOffset; } -/// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool +/// isOffsetInRange - Checks whether UserOffset (the location of a constant pool /// reference) is within MaxDisp of TrialOffset (a proposed location of a /// constant pool entry). -/// UserOffset is computed by GetUserOffset above to include PC adjustments. If +/// UserOffset is computed by getUserOffset above to include PC adjustments. If /// the mod 4 alignment of UserOffset is not known, the uncertainty must be /// subtracted from MaxDisp instead. CPUser::getMaxDisp() does that. -bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset, +bool ARMConstantIslands::isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, unsigned MaxDisp, bool NegativeOK, bool IsSoImm) { if (UserOffset <= TrialOffset) { @@ -982,11 +997,11 @@ bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset, return false; } -/// WaterIsInRange - Returns true if a CPE placed after the specified +/// isWaterInRange - Returns true if a CPE placed after the specified /// Water (a basic block) will be in range for the specific MI. /// /// Compute how much the function will grow by inserting a CPE after Water. -bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset, +bool ARMConstantIslands::isWaterInRange(unsigned UserOffset, MachineBasicBlock* Water, CPUser &U, unsigned &Growth) { unsigned CPELogAlign = getCPELogAlign(U.CPEMI); @@ -1013,7 +1028,7 @@ bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset, Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment); // If the CPE is to be inserted before the instruction, that will raise - // the offset of the instruction. Also account for unknown alignment padding + // the offset of the instruction. Also account for unknown alignment padding // in blocks between CPE and the user. if (CPEOffset < UserOffset) UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign); @@ -1021,15 +1036,15 @@ bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset, // CPE fits in existing padding. Growth = 0; - return OffsetIsInRange(UserOffset, CPEOffset, U); + return isOffsetInRange(UserOffset, CPEOffset, U); } -/// CPEIsInRange - Returns true if the distance between specific MI and +/// isCPEntryInRange - Returns true if the distance between specific MI and /// specific ConstPool entry instruction can fit in MI's displacement field. -bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset, +bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, MachineInstr *CPEMI, unsigned MaxDisp, bool NegOk, bool DoDump) { - unsigned CPEOffset = GetOffsetOf(CPEMI); + unsigned CPEOffset = getOffsetOf(CPEMI); assert(CPEOffset % 4 == 0 && "Misaligned CPE"); if (DoDump) { @@ -1046,7 +1061,7 @@ bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset, }); } - return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk); + return isOffsetInRange(UserOffset, CPEOffset, MaxDisp, NegOk); } #ifndef NDEBUG @@ -1066,7 +1081,7 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) { } #endif // NDEBUG -void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB) { +void ARMConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) { unsigned BBNum = BB->getNumber(); for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) { // Get the offset and known bits at the end of the layout predecessor. @@ -1088,17 +1103,18 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB) { } } -/// DecrementOldEntry - find the constant pool entry with index CPI +/// decrementCPEReferenceCount - find the constant pool entry with index CPI /// and instruction CPEMI, and decrement its refcount. If the refcount /// becomes 0 remove the entry and instruction. Returns true if we removed /// the entry, false if we didn't. -bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) { +bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI, + MachineInstr *CPEMI) { // Find the old entry. Eliminate it if it is no longer used. CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); assert(CPE && "Unexpected!"); if (--CPE->RefCount == 0) { - RemoveDeadCPEMI(CPEMI); + removeDeadCPEMI(CPEMI); CPE->CPEMI = NULL; --NumCPEs; return true; @@ -1112,13 +1128,14 @@ bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) { /// 0 = no existing entry found /// 1 = entry found, and there were no code insertions or deletions /// 2 = entry found, and there were code insertions or deletions -int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) +int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) { MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; // Check to see if the CPE is already in-range. - if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk, true)) { + if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk, + true)) { DEBUG(dbgs() << "In range\n"); return 1; } @@ -1133,7 +1150,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) // Removing CPEs can leave empty entries, skip if (CPEs[i].CPEMI == NULL) continue; - if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(), + if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(), U.NegOk)) { DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" << CPEs[i].CPI << "\n"); @@ -1149,7 +1166,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) CPEs[i].RefCount++; // ...and the original. If we didn't remove the old entry, none of the // addresses changed, so we don't need another pass. - return DecrementOldEntry(CPI, CPEMI) ? 2 : 1; + return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1; } } return 0; @@ -1170,7 +1187,7 @@ static inline unsigned getUnconditionalBrDisp(int Opc) { return ((1<<23)-1)*4; } -/// LookForWater - Look for an existing entry in the WaterList in which +/// findAvailableWater - Look for an existing entry in the WaterList in which /// we can place the CPE referenced from U so it's within range of U's MI. /// Returns true if found, false if not. If it returns true, WaterIter /// is set to the WaterList entry. For Thumb, prefer water that will not @@ -1178,7 +1195,7 @@ static inline unsigned getUnconditionalBrDisp(int Opc) { /// terminates, the CPE location for a particular CPUser is only allowed to /// move to a lower address, so search backward from the end of the list and /// prefer the first water that is in range. -bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, +bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, water_iterator &WaterIter) { if (WaterList.empty()) return false; @@ -1196,7 +1213,7 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, // sure to take advantage of it for all the CPEs near that block, so that // we don't insert more branches than necessary. unsigned Growth; - if (WaterIsInRange(UserOffset, WaterBB, U, Growth) && + if (isWaterInRange(UserOffset, WaterBB, U, Growth) && (WaterBB->getNumber() < U.HighWaterMark->getNumber() || NewWaterList.count(WaterBB)) && Growth < BestGrowth) { // This is the least amount of required padding seen so far. @@ -1215,14 +1232,14 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, return BestGrowth != ~0u; } -/// CreateNewWater - No existing WaterList entry will work for +/// createNewWater - No existing WaterList entry will work for /// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the /// block is used if in range, and the conditional branch munged so control /// flow is correct. Otherwise the block is split to create a hole with an /// unconditional branch around it. In either case NewMBB is set to a /// block following which the new island can be inserted (the WaterList /// is not adjusted). -void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, +void ARMConstantIslands::createNewWater(unsigned CPUserIndex, unsigned UserOffset, MachineBasicBlock *&NewMBB) { CPUser &U = CPUsers[CPUserIndex]; @@ -1245,7 +1262,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, unsigned CPEOffset = WorstCaseAlign(UserBlockEnd, CPELogAlign, UserBBI.postKnownBits()); - if (OffsetIsInRange(UserOffset, CPEOffset, U)) { + if (isOffsetInRange(UserOffset, CPEOffset, U)) { DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber() << format(", expected CPE offset %#x\n", CPEOffset)); NewMBB = llvm::next(MachineFunction::iterator(UserMBB)); @@ -1264,7 +1281,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, ImmBranches.push_back(ImmBranch(&UserMBB->back(), MaxDisp, false, UncondBr)); BBInfo[UserMBB->getNumber()].Size += Delta; - AdjustBBOffsetsAfter(UserMBB); + adjustBBOffsetsAfter(UserMBB); return; } } @@ -1298,7 +1315,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, // The 4 in the following is for the unconditional branch we'll be inserting // (allows for long branch on Thumb1). Alignment of the island is handled - // inside OffsetIsInRange. + // inside isOffsetInRange. BaseInsertOffset -= 4; DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset) @@ -1327,7 +1344,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, MI = llvm::next(MI)) { if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { CPUser &U = CPUsers[CPUIndex]; - if (!OffsetIsInRange(Offset, EndInsertOffset, U)) { + if (!isOffsetInRange(Offset, EndInsertOffset, U)) { // Shift intertion point by one unit of alignment so it is within reach. BaseInsertOffset -= 1u << LogAlign; EndInsertOffset -= 1u << LogAlign; @@ -1352,29 +1369,29 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, // Avoid splitting an IT block. if (LastIT) { unsigned PredReg = 0; - ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); + ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg); if (CC != ARMCC::AL) MI = LastIT; } - NewMBB = SplitBlockBeforeInstr(MI); + NewMBB = splitBlockBeforeInstr(MI); } -/// HandleConstantPoolUser - Analyze the specified user, checking to see if it +/// handleConstantPoolUser - Analyze the specified user, checking to see if it /// is out-of-range. If so, pick up the constant pool value and move it some /// place in-range. Return true if we changed any addresses (thus must run /// another pass of branch lengthening), false otherwise. -bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { +bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; unsigned CPI = CPEMI->getOperand(1).getIndex(); unsigned Size = CPEMI->getOperand(2).getImm(); // Compute this only once, it's expensive. - unsigned UserOffset = GetUserOffset(U); + unsigned UserOffset = getUserOffset(U); // See if the current entry is within range, or there is a clone of it // in range. - int result = LookForExistingCPEntry(U, UserOffset); + int result = findInRangeCPEntry(U, UserOffset); if (result==1) return false; else if (result==2) return true; @@ -1386,7 +1403,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock(); MachineBasicBlock *NewMBB; water_iterator IP; - if (LookForWater(U, UserOffset, IP)) { + if (findAvailableWater(U, UserOffset, IP)) { DEBUG(dbgs() << "Found water in range\n"); MachineBasicBlock *WaterBB = *IP; @@ -1403,9 +1420,9 @@ bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { } else { // No water found. DEBUG(dbgs() << "No water found\n"); - CreateNewWater(CPUserIndex, UserOffset, NewMBB); + createNewWater(CPUserIndex, UserOffset, NewMBB); - // SplitBlockBeforeInstr adds to WaterList, which is important when it is + // splitBlockBeforeInstr adds to WaterList, which is important when it is // called while handling branches so that the water will be seen on the // next iteration for constant pools, but in this context, we don't want // it. Check for this so it will be removed from the WaterList. @@ -1430,10 +1447,10 @@ bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { MF->insert(NewMBB, NewIsland); // Update internal data structures to account for the newly inserted MBB. - UpdateForInsertedWaterBlock(NewIsland); + updateForInsertedWaterBlock(NewIsland); // Decrement the old entry, and remove it if refcount becomes 0. - DecrementOldEntry(CPI, CPEMI); + decrementCPEReferenceCount(CPI, CPEMI); // Now that we have an island to add the CPE to, clone the original CPE and // add it to the island. @@ -1448,7 +1465,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { // Increase the size of the island block to account for the new entry. BBInfo[NewIsland->getNumber()].Size += Size; - AdjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland))); + adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland))); // Finally, change the CPI in the instruction operand to be ID. for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) @@ -1463,9 +1480,9 @@ bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { return true; } -/// RemoveDeadCPEMI - Remove a dead constant pool entry instruction. Update +/// removeDeadCPEMI - Remove a dead constant pool entry instruction. Update /// sizes and offsets of impacted basic blocks. -void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) { +void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) { MachineBasicBlock *CPEBB = CPEMI->getParent(); unsigned Size = CPEMI->getOperand(2).getImm(); CPEMI->eraseFromParent(); @@ -1480,7 +1497,7 @@ void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) { // Entries are sorted by descending alignment, so realign from the front. CPEBB->setAlignment(getCPELogAlign(CPEBB->begin())); - AdjustBBOffsetsAfter(CPEBB); + adjustBBOffsetsAfter(CPEBB); // An island has only one predecessor BB and one successor BB. Check if // this BB's predecessor jumps directly to this BB's successor. This // shouldn't happen currently. @@ -1488,15 +1505,15 @@ void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) { // FIXME: remove the empty blocks after all the work is done? } -/// RemoveUnusedCPEntries - Remove constant pool entries whose refcounts +/// removeUnusedCPEntries - Remove constant pool entries whose refcounts /// are zero. -bool ARMConstantIslands::RemoveUnusedCPEntries() { +bool ARMConstantIslands::removeUnusedCPEntries() { unsigned MadeChange = false; for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { std::vector<CPEntry> &CPEs = CPEntries[i]; for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) { if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) { - RemoveDeadCPEMI(CPEs[j].CPEMI); + removeDeadCPEMI(CPEs[j].CPEMI); CPEs[j].CPEMI = NULL; MadeChange = true; } @@ -1505,18 +1522,18 @@ bool ARMConstantIslands::RemoveUnusedCPEntries() { return MadeChange; } -/// BBIsInRange - Returns true if the distance between specific MI and +/// isBBInRange - Returns true if the distance between specific MI and /// specific BB can fit in MI's displacement field. -bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB, +bool ARMConstantIslands::isBBInRange(MachineInstr *MI,MachineBasicBlock *DestBB, unsigned MaxDisp) { unsigned PCAdj = isThumb ? 4 : 8; - unsigned BrOffset = GetOffsetOf(MI) + PCAdj; + unsigned BrOffset = getOffsetOf(MI) + PCAdj; unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() << " from BB#" << MI->getParent()->getNumber() << " max delta=" << MaxDisp - << " from " << GetOffsetOf(MI) << " to " << DestOffset + << " from " << getOffsetOf(MI) << " to " << DestOffset << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); if (BrOffset <= DestOffset) { @@ -1530,37 +1547,37 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB, return false; } -/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far +/// fixupImmediateBr - Fix up an immediate branch whose destination is too far /// away to fit in its displacement field. -bool ARMConstantIslands::FixUpImmediateBr(ImmBranch &Br) { +bool ARMConstantIslands::fixupImmediateBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); // Check to see if the DestBB is already in-range. - if (BBIsInRange(MI, DestBB, Br.MaxDisp)) + if (isBBInRange(MI, DestBB, Br.MaxDisp)) return false; if (!Br.isCond) - return FixUpUnconditionalBr(Br); - return FixUpConditionalBr(Br); + return fixupUnconditionalBr(Br); + return fixupConditionalBr(Br); } -/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is +/// fixupUnconditionalBr - Fix up an unconditional branch whose destination is /// too far away to fit in its displacement field. If the LR register has been /// spilled in the epilogue, then we can use BL to implement a far jump. /// Otherwise, add an intermediate branch instruction to a branch. bool -ARMConstantIslands::FixUpUnconditionalBr(ImmBranch &Br) { +ARMConstantIslands::fixupUnconditionalBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *MBB = MI->getParent(); if (!isThumb1) - llvm_unreachable("FixUpUnconditionalBr is Thumb1 only!"); + llvm_unreachable("fixupUnconditionalBr is Thumb1 only!"); // Use BL to implement far jump. Br.MaxDisp = (1 << 21) * 2; MI->setDesc(TII->get(ARM::tBfar)); BBInfo[MBB->getNumber()].Size += 2; - AdjustBBOffsetsAfter(MBB); + adjustBBOffsetsAfter(MBB); HasFarJump = true; ++NumUBrFixed; @@ -1569,11 +1586,11 @@ ARMConstantIslands::FixUpUnconditionalBr(ImmBranch &Br) { return true; } -/// FixUpConditionalBr - Fix up a conditional branch whose destination is too +/// fixupConditionalBr - Fix up a conditional branch whose destination is too /// far away to fit in its displacement field. It is converted to an inverse /// conditional branch + an unconditional branch to the destination. bool -ARMConstantIslands::FixUpConditionalBr(ImmBranch &Br) { +ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); @@ -1607,7 +1624,7 @@ ARMConstantIslands::FixUpConditionalBr(ImmBranch &Br) { // bne L2 // b L1 MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); - if (BBIsInRange(MI, NewDest, Br.MaxDisp)) { + if (isBBInRange(MI, NewDest, Br.MaxDisp)) { DEBUG(dbgs() << " Invert Bcc condition and swap its destination with " << *BMI); BMI->getOperand(0).setMBB(DestBB); @@ -1619,7 +1636,7 @@ ARMConstantIslands::FixUpConditionalBr(ImmBranch &Br) { } if (NeedSplit) { - SplitBlockBeforeInstr(MI); + splitBlockBeforeInstr(MI); // No need for the branch to the next block. We're adding an unconditional // branch to the destination. int delta = TII->GetInstSizeInBytes(&MBB->back()); @@ -1651,14 +1668,14 @@ ARMConstantIslands::FixUpConditionalBr(ImmBranch &Br) { // Remove the old conditional branch. It may or may not still be in MBB. BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI); MI->eraseFromParent(); - AdjustBBOffsetsAfter(MBB); + adjustBBOffsetsAfter(MBB); return true; } -/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills +/// undoLRSpillRestore - Remove Thumb push / pop instructions that only spills /// LR / restores LR to pc. FIXME: This is done here because it's only possible /// to do this if tBfar is not used. -bool ARMConstantIslands::UndoLRSpillRestore() { +bool ARMConstantIslands::undoLRSpillRestore() { bool MadeChange = false; for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) { MachineInstr *MI = PushPopMIs[i]; @@ -1677,26 +1694,26 @@ bool ARMConstantIslands::UndoLRSpillRestore() { return MadeChange; } -// mayOptimizeThumb2Instruction - Returns true if OptimizeThumb2Instructions +// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions // below may shrink MI. bool ARMConstantIslands::mayOptimizeThumb2Instruction(const MachineInstr *MI) const { switch(MI->getOpcode()) { - // OptimizeThumb2Instructions. + // optimizeThumb2Instructions. case ARM::t2LEApcrel: case ARM::t2LDRpci: - // OptimizeThumb2Branches. + // optimizeThumb2Branches. case ARM::t2B: case ARM::t2Bcc: case ARM::tBcc: - // OptimizeThumb2JumpTables. + // optimizeThumb2JumpTables. case ARM::t2BR_JT: return true; } return false; } -bool ARMConstantIslands::OptimizeThumb2Instructions() { +bool ARMConstantIslands::optimizeThumb2Instructions() { bool MadeChange = false; // Shrink ADR and LDR from constantpool. @@ -1727,7 +1744,7 @@ bool ARMConstantIslands::OptimizeThumb2Instructions() { if (!NewOpc) continue; - unsigned UserOffset = GetUserOffset(U); + unsigned UserOffset = getUserOffset(U); unsigned MaxOffs = ((1 << Bits) - 1) * Scale; // Be conservative with inline asm. @@ -1735,22 +1752,23 @@ bool ARMConstantIslands::OptimizeThumb2Instructions() { MaxOffs -= 2; // FIXME: Check if offset is multiple of scale if scale is not 4. - if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) { + if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) { + DEBUG(dbgs() << "Shrink: " << *U.MI); U.MI->setDesc(TII->get(NewOpc)); MachineBasicBlock *MBB = U.MI->getParent(); BBInfo[MBB->getNumber()].Size -= 2; - AdjustBBOffsetsAfter(MBB); + adjustBBOffsetsAfter(MBB); ++NumT2CPShrunk; MadeChange = true; } } - MadeChange |= OptimizeThumb2Branches(); - MadeChange |= OptimizeThumb2JumpTables(); + MadeChange |= optimizeThumb2Branches(); + MadeChange |= optimizeThumb2JumpTables(); return MadeChange; } -bool ARMConstantIslands::OptimizeThumb2Branches() { +bool ARMConstantIslands::optimizeThumb2Branches() { bool MadeChange = false; for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) { @@ -1776,11 +1794,12 @@ bool ARMConstantIslands::OptimizeThumb2Branches() { if (NewOpc) { unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale; MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); - if (BBIsInRange(Br.MI, DestBB, MaxOffs)) { + if (isBBInRange(Br.MI, DestBB, MaxOffs)) { + DEBUG(dbgs() << "Shrink branch: " << *Br.MI); Br.MI->setDesc(TII->get(NewOpc)); MachineBasicBlock *MBB = Br.MI->getParent(); BBInfo[MBB->getNumber()].Size -= 2; - AdjustBBOffsetsAfter(MBB); + adjustBBOffsetsAfter(MBB); ++NumT2BrShrunk; MadeChange = true; } @@ -1797,7 +1816,7 @@ bool ARMConstantIslands::OptimizeThumb2Branches() { NewOpc = 0; unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(Br.MI, PredReg); if (Pred == ARMCC::EQ) NewOpc = ARM::tCBZ; else if (Pred == ARMCC::NE) @@ -1807,7 +1826,7 @@ bool ARMConstantIslands::OptimizeThumb2Branches() { MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); // Check if the distance is within 126. Subtract starting offset by 2 // because the cmp will be eliminated. - unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2; + unsigned BrOffset = getOffsetOf(Br.MI) + 4 - 2; unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) { MachineBasicBlock::iterator CmpMI = Br.MI; @@ -1815,11 +1834,12 @@ bool ARMConstantIslands::OptimizeThumb2Branches() { --CmpMI; if (CmpMI->getOpcode() == ARM::tCMPi8) { unsigned Reg = CmpMI->getOperand(0).getReg(); - Pred = llvm::getInstrPredicate(CmpMI, PredReg); + Pred = getInstrPredicate(CmpMI, PredReg); if (Pred == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 && isARMLowRegister(Reg)) { MachineBasicBlock *MBB = Br.MI->getParent(); + DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI); MachineInstr *NewBR = BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc)) .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags()); @@ -1827,7 +1847,7 @@ bool ARMConstantIslands::OptimizeThumb2Branches() { Br.MI->eraseFromParent(); Br.MI = NewBR; BBInfo[MBB->getNumber()].Size -= 2; - AdjustBBOffsetsAfter(MBB); + adjustBBOffsetsAfter(MBB); ++NumCBZ; MadeChange = true; } @@ -1839,9 +1859,9 @@ bool ARMConstantIslands::OptimizeThumb2Branches() { return MadeChange; } -/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller +/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller /// jumptables when it's possible. -bool ARMConstantIslands::OptimizeThumb2JumpTables() { +bool ARMConstantIslands::optimizeThumb2JumpTables() { bool MadeChange = false; // FIXME: After the tables are shrunk, can we get rid some of the @@ -1861,7 +1881,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables() { bool ByteOk = true; bool HalfWordOk = true; - unsigned JTOffset = GetOffsetOf(MI) + 4; + unsigned JTOffset = getOffsetOf(MI) + 4; const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) { MachineBasicBlock *MBB = JTBBs[j]; @@ -1936,11 +1956,14 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables() { if (!OptOk) continue; + DEBUG(dbgs() << "Shrink JT: " << *MI << " addr: " << *AddrMI + << " lea: " << *LeaMI); unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT; MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc)) .addReg(IdxReg, getKillRegState(IdxRegKill)) .addJumpTableIndex(JTI, JTOP.getTargetFlags()) .addImm(MI->getOperand(JTOpIdx+1).getImm()); + DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI); // FIXME: Insert an "ALIGN" instruction to ensure the next instruction // is 2-byte aligned. For now, asm printer will fix it up. unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI); @@ -1954,7 +1977,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables() { int delta = OrigSize - NewSize; BBInfo[MBB->getNumber()].Size -= delta; - AdjustBBOffsetsAfter(MBB); + adjustBBOffsetsAfter(MBB); ++NumTBs; MadeChange = true; @@ -1964,9 +1987,9 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables() { return MadeChange; } -/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that +/// reorderThumb2JumpTables - Adjust the function's block layout to ensure that /// jump tables always branch forwards, since that's what tbb and tbh need. -bool ARMConstantIslands::ReorderThumb2JumpTables() { +bool ARMConstantIslands::reorderThumb2JumpTables() { bool MadeChange = false; MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); @@ -1995,7 +2018,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables() { // The destination precedes the switch. Try to move the block forward // so we have a positive offset. MachineBasicBlock *NewBB = - AdjustJTTargetBlockForward(MBB, MI->getParent()); + adjustJTTargetBlockForward(MBB, MI->getParent()); if (NewBB) MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB); MadeChange = true; @@ -2007,8 +2030,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables() { } MachineBasicBlock *ARMConstantIslands:: -AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) -{ +adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { // If the destination block is terminated by an unconditional branch, // try to move it; otherwise, create a new block following the jump // table that branches back to the actual target. This is a very simple diff --git a/lib/Target/ARM/ARMELFWriterInfo.h b/lib/Target/ARM/ARMELFWriterInfo.h index 1c4e532..6a84f8a 100644 --- a/lib/Target/ARM/ARMELFWriterInfo.h +++ b/lib/Target/ARM/ARMELFWriterInfo.h @@ -17,6 +17,7 @@ #include "llvm/Target/TargetELFWriterInfo.h" namespace llvm { + class TargetMachine; class ARMELFWriterInfo : public TargetELFWriterInfo { public: diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index c2b7816..5fc0360 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -19,7 +19,6 @@ #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" -#include "ARMRegisterInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -613,7 +612,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg); unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm; @@ -794,15 +793,15 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, "base pointer without frame pointer?"); if (AFI->isThumb2Function()) { - llvm::emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, ARMCC::AL, 0, *TII); + emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, ARMCC::AL, 0, *TII); } else if (AFI->isThumbFunction()) { - llvm::emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, *TII, RI); + emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, *TII, RI); } else { - llvm::emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, ARMCC::AL, 0, - *TII); + emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, ARMCC::AL, 0, + *TII); } // If there's dynamic realignment, adjust for it. if (RI.needsStackRealignment(MF)) { diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index a24eab4..2e1eaca 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -16,7 +16,6 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMCallingConv.h" -#include "ARMRegisterInfo.h" #include "ARMTargetMachine.h" #include "ARMSubtarget.h" #include "ARMConstantPoolValue.h" @@ -2112,13 +2111,10 @@ bool ARMFastISel::SelectRet(const Instruction *I) { } unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { - - // iOS needs the r9 versions of the opcodes. - bool isiOS = Subtarget->isTargetIOS(); if (isThumb2) { - return isiOS ? ARM::tBLr9 : ARM::tBL; + return ARM::tBL; } else { - return isiOS ? ARM::BLr9 : ARM::BL; + return ARM::BL; } } @@ -2177,8 +2173,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) return false; - // Issue the call, BLr9 for iOS, BL otherwise. - // TODO: Turn this into the table of arm call ops. + // Issue the call. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(NULL); if (isThumb2) @@ -2303,8 +2298,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) return false; - // Issue the call, BLr9 for iOS, BL otherwise. - // TODO: Turn this into the table of arm call ops. + // Issue the call. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(GV); // Explicitly adding the predicate here. @@ -2350,7 +2344,8 @@ bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) { return Len <= 16; } -bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len) { +bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, + uint64_t Len) { // Make sure we don't bloat code by inlining very large memcpy's. if (!ARMIsMemCpySmall(Len)) return false; @@ -2639,7 +2634,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, } namespace llvm { - llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { + FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { // Completely untested on non-iOS. const TargetMachine &TM = funcInfo.MF->getTarget(); diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index bd4b2a9..402ecb0 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -422,17 +422,16 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; } - if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND || - RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) { + if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri) { // Tail call return: adjust the stack pointer and jump to callee. MBBI = MBB.getLastNonDebugInstr(); MachineOperand &JumpTarget = MBBI->getOperand(0); // Jump to label or value in register. - if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) { - unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi) - ? (STI.isThumb() ? ARM::tTAILJMPd : ARM::TAILJMPd) - : (STI.isThumb() ? ARM::tTAILJMPdND : ARM::TAILJMPdND); + if (RetOpcode == ARM::TCRETURNdi) { + unsigned TCOpcode = STI.isThumb() ? + (STI.isTargetIOS() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) : + ARM::TAILJMPd; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); if (JumpTarget.isGlobal()) MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), @@ -449,10 +448,6 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). addReg(JumpTarget.getReg(), RegState::Kill); - } else if (RetOpcode == ARM::TCRETURNriND) { - BuildMI(MBB, MBBI, dl, - TII.get(STI.isThumb() ? ARM::tTAILJMPrND : ARM::TAILJMPrND)). - addReg(JumpTarget.getReg(), RegState::Kill); } MachineInstr *NewMI = prior(MBBI); @@ -648,9 +643,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, DebugLoc DL = MI->getDebugLoc(); unsigned RetOpcode = MI->getOpcode(); bool isTailCall = (RetOpcode == ARM::TCRETURNdi || - RetOpcode == ARM::TCRETURNdiND || - RetOpcode == ARM::TCRETURNri || - RetOpcode == ARM::TCRETURNriND); + RetOpcode == ARM::TCRETURNri); SmallVector<unsigned, 4> Regs; unsigned i = CSI.size(); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index ffb9acb..1eafbbc 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2825,7 +2825,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case MVT::v8i8: Opc = ARM::VZIPd8; break; case MVT::v4i16: Opc = ARM::VZIPd16; break; case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VZIPd32; break; + // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. + case MVT::v2i32: Opc = ARM::VTRNd32; break; case MVT::v16i8: Opc = ARM::VZIPq8; break; case MVT::v8i16: Opc = ARM::VZIPq16; break; case MVT::v4f32: @@ -2844,7 +2845,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case MVT::v8i8: Opc = ARM::VUZPd8; break; case MVT::v4i16: Opc = ARM::VUZPd16; break; case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VUZPd32; break; + // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. + case MVT::v2i32: Opc = ARM::VTRNd32; break; case MVT::v16i8: Opc = ARM::VUZPq8; break; case MVT::v8i16: Opc = ARM::VUZPq16; break; case MVT::v4f32: diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e26dd22..a103c94 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -19,7 +19,6 @@ #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "ARMPerfectShuffle.h" -#include "ARMRegisterInfo.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" @@ -508,7 +507,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FRINT, MVT::v2f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); - + setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); setOperationAction(ISD::FSIN, MVT::v4f32, Expand); setOperationAction(ISD::FCOS, MVT::v4f32, Expand); @@ -770,8 +769,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); - setOperationAction(ISD::FMA, MVT::f64, Expand); - setOperationAction(ISD::FMA, MVT::f32, Expand); + if (!Subtarget->hasVFP4()) { + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + } // Various VFP goodness if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { @@ -1642,7 +1643,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, /// and then confiscate the rest of the parameter registers to insure /// this. void -llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { +ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { unsigned reg = State->AllocateReg(GPRArgRegs, 4); assert((State->getCallOrPrologue() == Prologue || State->getCallOrPrologue() == Call) && @@ -1672,7 +1673,7 @@ llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, - const ARMInstrInfo *TII) { + const TargetInstrInfo *TII) { unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; int FI = INT_MAX; if (Arg.getOpcode() == ISD::CopyFromReg) { @@ -1807,8 +1808,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // the caller's fixed stack objects. MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const ARMInstrInfo *TII = - ((ARMTargetMachine&)getTargetMachine()).getInstrInfo(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; ++i, ++realArgIdx) { @@ -1936,63 +1936,72 @@ ARMTargetLowering::LowerReturn(SDValue Chain, return result; } -bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { +bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { if (N->getNumValues() != 1) return false; if (!N->hasNUsesOfValue(1, 0)) return false; - unsigned NumCopies = 0; - SDNode* Copies[2] = { 0, 0 }; - SDNode *Use = *N->use_begin(); - if (Use->getOpcode() == ISD::CopyToReg) { - Copies[NumCopies++] = Use; - } else if (Use->getOpcode() == ARMISD::VMOVRRD) { + SDValue TCChain = Chain; + SDNode *Copy = *N->use_begin(); + if (Copy->getOpcode() == ISD::CopyToReg) { + // If the copy has a glue operand, we conservatively assume it isn't safe to + // perform a tail call. + if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) + return false; + TCChain = Copy->getOperand(0); + } else if (Copy->getOpcode() == ARMISD::VMOVRRD) { + SDNode *VMov = Copy; // f64 returned in a pair of GPRs. - for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end(); + SmallPtrSet<SDNode*, 2> Copies; + for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); UI != UE; ++UI) { if (UI->getOpcode() != ISD::CopyToReg) return false; - Copies[UI.getUse().getResNo()] = *UI; - ++NumCopies; + Copies.insert(*UI); } - } else if (Use->getOpcode() == ISD::BITCAST) { + if (Copies.size() > 2) + return false; + + for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); + UI != UE; ++UI) { + SDValue UseChain = UI->getOperand(0); + if (Copies.count(UseChain.getNode())) + // Second CopyToReg + Copy = *UI; + else + // First CopyToReg + TCChain = UseChain; + } + } else if (Copy->getOpcode() == ISD::BITCAST) { // f32 returned in a single GPR. - if (!Use->hasNUsesOfValue(1, 0)) + if (!Copy->hasOneUse()) return false; - Use = *Use->use_begin(); - if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0)) + Copy = *Copy->use_begin(); + if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) return false; - Copies[NumCopies++] = Use; + Chain = Copy->getOperand(0); } else { return false; } - if (NumCopies != 1 && NumCopies != 2) - return false; - bool HasRet = false; - for (unsigned i = 0; i < NumCopies; ++i) { - SDNode *Copy = Copies[i]; - for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); - UI != UE; ++UI) { - if (UI->getOpcode() == ISD::CopyToReg) { - SDNode *Use = *UI; - if (Use == Copies[0] || ((NumCopies == 2) && (Use == Copies[1]))) - continue; - return false; - } - if (UI->getOpcode() != ARMISD::RET_FLAG) - return false; - HasRet = true; - } + for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); + UI != UE; ++UI) { + if (UI->getOpcode() != ARMISD::RET_FLAG) + return false; + HasRet = true; } - return HasRet; + if (!HasRet) + return false; + + Chain = TCChain; + return true; } bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { - if (!EnableARMTailCalls) + if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) return false; if (!CI->isTailCall()) @@ -3674,27 +3683,6 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { return Result; } -SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *ST) const { - if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16()) - return SDValue(); - - ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); - assert(Op.getValueType() == MVT::f32 && - "ConstantFP custom lowering should only occur for f32."); - - APFloat FPVal = CFP->getValueAPF(); - int ImmVal = ARM_AM::getFP32Imm(FPVal); - if (ImmVal == -1) - return SDValue(); - - DebugLoc DL = Op.getDebugLoc(); - SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); - SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, NewVal); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant, - DAG.getConstant(0, MVT::i32)); -} - /// isNEONModifiedImm - Check if the specified splat value corresponds to a /// valid vector constant for a NEON instruction with a "modified immediate" /// operand (e.g., VMOV). If so, return the encoded value. @@ -3831,6 +3819,58 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, return DAG.getTargetConstant(EncodedVal, MVT::i32); } +SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) const { + if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16()) + return SDValue(); + + ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); + assert(Op.getValueType() == MVT::f32 && + "ConstantFP custom lowering should only occur for f32."); + + // Try splatting with a VMOV.f32... + APFloat FPVal = CFP->getValueAPF(); + int ImmVal = ARM_AM::getFP32Imm(FPVal); + if (ImmVal != -1) { + DebugLoc DL = Op.getDebugLoc(); + SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); + SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, + NewVal); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant, + DAG.getConstant(0, MVT::i32)); + } + + // If that fails, try a VMOV.i32 + EVT VMovVT; + unsigned iVal = FPVal.bitcastToAPInt().getZExtValue(); + SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false, + VMOVModImm); + if (NewVal != SDValue()) { + DebugLoc DL = Op.getDebugLoc(); + SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, + NewVal); + SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, + VecConstant); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, + DAG.getConstant(0, MVT::i32)); + } + + // Finally, try a VMVN.i32 + NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false, + VMVNModImm); + if (NewVal != SDValue()) { + DebugLoc DL = Op.getDebugLoc(); + SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); + SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, + VecConstant); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, + DAG.getConstant(0, MVT::i32)); + } + + return SDValue(); +} + + static bool isVEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseVEXT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); @@ -5795,7 +5835,8 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad; unsigned MaxCSNum = 0; MachineModuleInfo &MMI = MF->getMMI(); - for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) { + for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; + ++BB) { if (!BB->isLandingPad()) continue; // FIXME: We should assert that the EH_LABEL is the first MI in the landing @@ -5871,7 +5912,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { BuildMI(DispatchBB, dl, TII->get(ARM::tInt_eh_sjlj_dispatchsetup)); else if (!Subtarget->hasVFP2()) BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup_nofp)); - else + else BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); unsigned NumLPads = LPadList.size(); @@ -7308,15 +7349,99 @@ static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { /// ISD::STORE. static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { - // Bitcast an i64 store extracted from a vector to f64. - // Otherwise, the i64 value will be legalized to a pair of i32 values. StoreSDNode *St = cast<StoreSDNode>(N); + if (St->isVolatile()) + return SDValue(); + + // Optimize trunc store (of multiple scalars) to shuffle and store. First, + // pack all of the elements in one place. Next, store to memory in fewer + // chunks. SDValue StVal = St->getValue(); - if (!ISD::isNormalStore(St) || St->isVolatile()) + EVT VT = StVal.getValueType(); + if (St->isTruncatingStore() && VT.isVector()) { + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT StVT = St->getMemoryVT(); + unsigned NumElems = VT.getVectorNumElements(); + assert(StVT != VT && "Cannot truncate to the same type"); + unsigned FromEltSz = VT.getVectorElementType().getSizeInBits(); + unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits(); + + // From, To sizes and ElemCount must be pow of two + if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); + + // We are going to use the original vector elt for storing. + // Accumulated smaller vector elements must be a multiple of the store size. + if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); + + unsigned SizeRatio = FromEltSz / ToEltSz; + assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); + + // Create a type on which we perform the shuffle. + EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), + NumElems*SizeRatio); + assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); + + DebugLoc DL = St->getDebugLoc(); + SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); + SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); + for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio; + + // Can't shuffle using an illegal type. + if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); + + SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, + DAG.getUNDEF(WideVec.getValueType()), + ShuffleVec.data()); + // At this point all of the data is stored at the bottom of the + // register. We now need to save it to mem. + + // Find the largest store unit + MVT StoreType = MVT::i8; + for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE; + tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) { + MVT Tp = (MVT::SimpleValueType)tp; + if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) + StoreType = Tp; + } + // Didn't find a legal store type. + if (!TLI.isTypeLegal(StoreType)) + return SDValue(); + + // Bitcast the original vector into a vector of store-size units + EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), + StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); + assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); + SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); + SmallVector<SDValue, 8> Chains; + SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, + TLI.getPointerTy()); + SDValue BasePtr = St->getBasePtr(); + + // Perform one or more big stores into memory. + unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); + for (unsigned I = 0; I < E; I++) { + SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + StoreType, ShuffWide, + DAG.getIntPtrConstant(I)); + SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, + Increment); + Chains.push_back(Ch); + } + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], + Chains.size()); + } + + if (!ISD::isNormalStore(St)) return SDValue(); + // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and + // ARM stores of arguments in the same cache line. if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && - StVal.getNode()->hasOneUse() && !St->isVolatile()) { + StVal.getNode()->hasOneUse()) { SelectionDAG &DAG = DCI.DAG; DebugLoc DL = St->getDebugLoc(); SDValue BasePtr = St->getBasePtr(); @@ -7337,6 +7462,8 @@ static SDValue PerformSTORECombine(SDNode *N, StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); + // Bitcast an i64 store extracted from a vector to f64. + // Otherwise, the i64 value will be legalized to a pair of i32 values. SelectionDAG &DAG = DCI.DAG; DebugLoc dl = StVal.getDebugLoc(); SDValue IntVec = StVal.getOperand(0); @@ -8259,8 +8386,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { if (Res.getNode()) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); - DAG.ComputeMaskedBits(SDValue(N,0), Mask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne); // Capture demanded bits information that would be otherwise lost. if (KnownZero == 0xfffffffe) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, @@ -8586,10 +8712,12 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, /// a register against the immediate without having to materialize the /// immediate into a register. bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + // Thumb2 and ARM modes can use cmn for negative immediates. if (!Subtarget->isThumb()) - return ARM_AM::getSOImmVal(Imm) != -1; + return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1; if (Subtarget->isThumb2()) - return ARM_AM::getT2SOImmVal(Imm) != -1; + return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1; + // Thumb1 doesn't have cmn, and only 8-bit immediates. return Imm >= 0 && Imm <= 255; } @@ -8776,22 +8904,20 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, } void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { - KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); switch (Op.getOpcode()) { default: break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. - DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); if (KnownZero == 0 && KnownOne == 0) return; APInt KnownZeroRHS, KnownOneRHS; - DAG.ComputeMaskedBits(Op.getOperand(1), Mask, - KnownZeroRHS, KnownOneRHS, Depth+1); + DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); KnownZero &= KnownZeroRHS; KnownOne &= KnownOneRHS; return; diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index a71b74e..352d980 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -315,7 +315,6 @@ namespace llvm { SelectionDAG &DAG) const; virtual void computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, @@ -494,7 +493,7 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; - virtual bool isUsedByReturnOnly(SDNode *N) const; + virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const; virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 1d38bcf..f04926a 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -532,6 +532,7 @@ class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern> let Inst{11-4} = 0b00001001; let Inst{3-0} = Rt2; + let Unpredictable{11-8} = 0b1111; let DecoderMethod = "DecodeSwap"; } diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index 72af535..5d3e059 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -18,7 +18,6 @@ #include "ARMBaseInstrInfo.h" #include "ARMRegisterInfo.h" #include "ARMSubtarget.h" -#include "llvm/Target/TargetInstrInfo.h" namespace llvm { class ARMSubtarget; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 8196582..1eb561d 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -181,12 +181,8 @@ def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate<"FeatureVFP3">; def HasVFP4 : Predicate<"Subtarget->hasVFP4()">, AssemblerPredicate<"FeatureVFP4">; -def NoVFP4 : Predicate<"!Subtarget->hasVFP4()">; def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON">; -def HasNEON2 : Predicate<"Subtarget->hasNEON2()">, - AssemblerPredicate<"FeatureNEON2">; -def NoNEON2 : Predicate<"!Subtarget->hasNEON2()">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16">; def HasDivide : Predicate<"Subtarget->hasDivide()">, @@ -221,6 +217,14 @@ def UseMovt : Predicate<"Subtarget->useMovt()">; def DontUseMovt : Predicate<"!Subtarget->useMovt()">; def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; +// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available. +// But only select them if more precision in FP computation is allowed. +// Do not use them for Darwin platforms. +def UseFusedMAC : Predicate<"!TM.Options.NoExcessFPPrecision && " + "!Subtarget->isTargetDarwin()">; +def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || " + "Subtarget->isTargetDarwin()">; + //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -251,7 +255,8 @@ def imm16_31 : ImmLeaf<i32, [{ def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; } def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ - return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1; + int64_t Value = -(int)N->getZExtValue(); + return Value && ARM_AM::getSOImmVal(Value) != -1; }], so_imm_neg_XFORM> { let ParserMatchClass = so_imm_neg_asmoperand; } @@ -736,7 +741,7 @@ def postidx_reg : Operand<i32> { let DecoderMethod = "DecodePostIdxReg"; let PrintMethod = "printPostIdxRegOperand"; let ParserMatchClass = PostIdxRegAsmOperand; - let MIOperandInfo = (ops GPR, i32imm); + let MIOperandInfo = (ops GPRnopc, i32imm); } @@ -903,6 +908,11 @@ def p_imm : Operand<i32> { let DecoderMethod = "DecodeCoprocessor"; } +def pf_imm : Operand<i32> { + let PrintMethod = "printPImmediate"; + let ParserMatchClass = CoprocNumAsmOperand; +} + def CoprocRegAsmOperand : AsmOperandClass { let Name = "CoprocReg"; let ParserMethod = "parseCoprocRegOperand"; @@ -1182,6 +1192,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, let Inst{19-16} = Rn; let Inst{15-12} = 0b0000; let Inst{11-0} = imm; + + let Unpredictable{15-12} = 0b1111; } def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir, opc, "\t$Rn, $Rm", @@ -1195,6 +1207,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, let Inst{15-12} = 0b0000; let Inst{11-4} = 0b00000000; let Inst{3-0} = Rm; + + let Unpredictable{15-12} = 0b1111; } def rsi : AI1<opcod, (outs), (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis, @@ -1209,11 +1223,13 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, let Inst{11-5} = shift{11-5}; let Inst{4} = 0; let Inst{3-0} = shift{3-0}; + + let Unpredictable{15-12} = 0b1111; } def rsr : AI1<opcod, (outs), - (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis, + (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis, opc, "\t$Rn, $shift", - [(opnode GPR:$Rn, so_reg_reg:$shift)]> { + [(opnode GPRnopc:$Rn, so_reg_reg:$shift)]> { bits<4> Rn; bits<12> shift; let Inst{25} = 0; @@ -1225,6 +1241,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, let Inst{6-5} = shift{6-5}; let Inst{4} = 1; let Inst{3-0} = shift{3-0}; + + let Unpredictable{15-12} = 0b1111; } } @@ -1330,10 +1348,10 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{4} = 0; let Inst{3-0} = shift{3-0}; } - def rsr : AsI1<opcod, (outs GPR:$Rd), - (ins GPR:$Rn, so_reg_reg:$shift), + def rsr : AsI1<opcod, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift, CPSR))]>, + [(set GPRnopc:$Rd, CPSR, (opnode GPRnopc:$Rn, so_reg_reg:$shift, CPSR))]>, Requires<[IsARM]> { bits<4> Rd; bits<4> Rn; @@ -1367,7 +1385,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, cc_out:$s)>, Requires<[IsARM]>; def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), - (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn, + (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPRnopc:$Rdn, GPRnopc:$Rdn, so_reg_reg:$shift, pred:$p, cc_out:$s)>, Requires<[IsARM]>; @@ -1907,7 +1925,7 @@ let isCall = 1, def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops), IIC_Br, "bl\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsARM, IsNotIOS]> { + Requires<[IsARM]> { let Inst{31-28} = 0b1110; bits<24> func; let Inst{23-0} = func; @@ -1917,7 +1935,7 @@ let isCall = 1, def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops), IIC_Br, "bl", "\t$func", [(ARMcall_pred tglobaladdr:$func)]>, - Requires<[IsARM, IsNotIOS]> { + Requires<[IsARM]> { bits<24> func; let Inst{23-0} = func; let DecoderMethod = "DecodeBranchImmInstruction"; @@ -1927,7 +1945,7 @@ let isCall = 1, def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, IIC_Br, "blx\t$func", [(ARMcall GPR:$func)]>, - Requires<[IsARM, HasV5T, IsNotIOS]> { + Requires<[IsARM, HasV5T]> { bits<4> func; let Inst{31-4} = 0b1110000100101111111111110011; let Inst{3-0} = func; @@ -1936,7 +1954,7 @@ let isCall = 1, def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, IIC_Br, "blx", "\t$func", [(ARMcall_pred GPR:$func)]>, - Requires<[IsARM, HasV5T, IsNotIOS]> { + Requires<[IsARM, HasV5T]> { bits<4> func; let Inst{27-4} = 0b000100101111111111110011; let Inst{3-0} = func; @@ -1946,67 +1964,19 @@ let isCall = 1, // Note: Restrict $func to the tGPR regclass to prevent it being in LR. def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, HasV4T, IsNotIOS]>; + Requires<[IsARM, HasV4T]>; // ARMv4 def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, NoV4T, IsNotIOS]>; + Requires<[IsARM, NoV4T]>; // mov lr, pc; b if callee is marked noreturn to avoid confusing the // return stack predictor. def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins bl_target:$func, variable_ops), 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, - Requires<[IsARM, IsNotIOS]>; -} - -let isCall = 1, - // On IOS R9 is call-clobbered. - // R7 is marked as a use to prevent frame-pointer assignments from being - // moved above / below calls. - Defs = [LR], Uses = [R7, SP] in { - def BLr9 : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops), - 4, IIC_Br, - [(ARMcall tglobaladdr:$func)], (BL bl_target:$func)>, - Requires<[IsARM, IsIOS]>; - - def BLr9_pred : ARMPseudoExpand<(outs), - (ins bl_target:$func, pred:$p, variable_ops), - 4, IIC_Br, - [(ARMcall_pred tglobaladdr:$func)], - (BL_pred bl_target:$func, pred:$p)>, - Requires<[IsARM, IsIOS]>; - - // ARMv5T and above - def BLXr9 : ARMPseudoExpand<(outs), (ins GPR:$func, variable_ops), - 4, IIC_Br, - [(ARMcall GPR:$func)], - (BLX GPR:$func)>, - Requires<[IsARM, HasV5T, IsIOS]>; - - def BLXr9_pred: ARMPseudoExpand<(outs), (ins GPR:$func, pred:$p,variable_ops), - 4, IIC_Br, - [(ARMcall_pred GPR:$func)], - (BLX_pred GPR:$func, pred:$p)>, - Requires<[IsARM, HasV5T, IsIOS]>; - - // ARMv4T - // Note: Restrict $func to the tGPR regclass to prevent it being in LR. - def BXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), - 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, HasV4T, IsIOS]>; - - // ARMv4 - def BMOVPCRXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), - 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, NoV4T, IsIOS]>; - - // mov lr, pc; b if callee is marked noreturn to avoid confusing the - // return stack predictor. - def BMOVPCBr9_CALL : ARMPseudoInst<(outs),(ins bl_target:$func, variable_ops), - 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, - Requires<[IsARM, IsIOS]>; + Requires<[IsARM]>; } let isBranch = 1, isTerminator = 1 in { @@ -2073,45 +2043,22 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", // Tail calls. -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { - // IOS versions. - let Uses = [SP] in { - def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), - IIC_Br, []>, Requires<[IsIOS]>; - - def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), - IIC_Br, []>, Requires<[IsIOS]>; - - def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops), - 4, IIC_Br, [], - (Bcc br_target:$dst, (ops 14, zero_reg))>, - Requires<[IsARM, IsIOS]>; +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { + def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), + IIC_Br, []>; - def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), - 4, IIC_Br, [], - (BX GPR:$dst)>, - Requires<[IsARM, IsIOS]>; + def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), + IIC_Br, []>; - } - - // Non-IOS versions (the difference is R9). - let Uses = [SP] in { - def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), - IIC_Br, []>, Requires<[IsNotIOS]>; - - def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), - IIC_Br, []>, Requires<[IsNotIOS]>; + def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops), + 4, IIC_Br, [], + (Bcc br_target:$dst, (ops 14, zero_reg))>, + Requires<[IsARM]>; - def TAILJMPdND : ARMPseudoExpand<(outs), (ins brtarget:$dst, variable_ops), - 4, IIC_Br, [], - (Bcc br_target:$dst, (ops 14, zero_reg))>, - Requires<[IsARM, IsNotIOS]>; - - def TAILJMPrND : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), - 4, IIC_Br, [], - (BX GPR:$dst)>, - Requires<[IsARM, IsNotIOS]>; - } + def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), + 4, IIC_Br, [], + (BX GPR:$dst)>, + Requires<[IsARM]>; } // Secure Monitor Call is a system instruction. @@ -2484,7 +2431,7 @@ multiclass AI3ldrT<bits<4> op, string opc> { let Inst{3-0} = offset{3-0}; let AsmMatchConverter = "cvtLdExtTWriteBackImm"; } - def r : AI3ldstidxT<op, 1, (outs GPR:$Rt, GPR:$base_wb), + def r : AI3ldstidxT<op, 1, (outs GPRnopc:$Rt, GPRnopc:$base_wb), (ins addr_offset_none:$addr, postidx_reg:$Rm), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc, "\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> { @@ -2492,8 +2439,10 @@ multiclass AI3ldrT<bits<4> op, string opc> { let Inst{23} = Rm{4}; let Inst{22} = 0; let Inst{11-8} = 0; + let Unpredictable{11-8} = 0b1111; let Inst{3-0} = Rm{3-0}; let AsmMatchConverter = "cvtLdExtTWriteBackReg"; + let DecoderMethod = "DecodeLDR"; } } @@ -3241,6 +3190,8 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc, let Inst{19-16} = Rn; let Inst{15-12} = Rd; let Inst{3-0} = Rm; + + let Unpredictable{11-8} = 0b1111; } // Saturating add/subtract @@ -3533,19 +3484,20 @@ class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, // property. Remove them when it's possible to add those properties // on an individual MachineInstr, not just an instuction description. let isCommutable = 1 in { -def MUL : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), +def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>, + [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>, Requires<[IsARM, HasV6]> { let Inst{15-12} = 0b0000; + let Unpredictable{15-12} = 0b1111; } let Constraints = "@earlyclobber $Rd" in -def MULv5: ARMPseudoExpand<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, +def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s), 4, IIC_iMUL32, - [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))], - (MUL GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))], + (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>, Requires<[IsARM, NoV6]>; } @@ -4040,10 +3992,13 @@ def BCCZi64 : PseudoInst<(outs), // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { + +let isCommutable = 1 in def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p), 4, IIC_iCMOVr, [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; + def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, so_reg_imm:$shift, pred:$p), 4, IIC_iCMOVsr, @@ -4164,7 +4119,7 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, let Inst{3-0} = opt; } -// Pseudo isntruction that combines movs + predicated rsbmi +// Pseudo instruction that combines movs + predicated rsbmi // to implement integer ABS let usesCustomInserter = 1, Defs = [CPSR] in { def ABS : ARMPseudoInst< @@ -4325,9 +4280,9 @@ def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>, // SWP/SWPB are deprecated in V6/V7. let mayLoad = 1, mayStore = 1 in { -def SWP : AIswp<0, (outs GPR:$Rt), (ins GPR:$Rt2, addr_offset_none:$addr), +def SWP : AIswp<0, (outs GPRnopc:$Rt), (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>; -def SWPB: AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, addr_offset_none:$addr), +def SWPB: AIswp<1, (outs GPRnopc:$Rt), (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>; } @@ -4356,7 +4311,7 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, let Inst{23-20} = opc1; } -def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, +def CDP2 : ABXI<0b1110, (outs), (ins pf_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, @@ -4635,7 +4590,7 @@ def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, class MovRRCopro<string opc, bit direction, list<dag> pattern = []> : ABI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1, - GPR:$Rt, GPR:$Rt2, c_imm:$CRm), + GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm), NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> { let Inst{23-21} = 0b010; let Inst{20} = direction; @@ -4654,13 +4609,13 @@ class MovRRCopro<string opc, bit direction, list<dag> pattern = []> } def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */, - [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2, + [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt, GPRnopc:$Rt2, imm:$CRm)]>; def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>; class MovRRCopro2<string opc, bit direction, list<dag> pattern = []> : ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1, - GPR:$Rt, GPR:$Rt2, c_imm:$CRm), NoItinerary, + GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm), NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> { let Inst{31-28} = 0b1111; let Inst{23-21} = 0b010; @@ -4677,10 +4632,12 @@ class MovRRCopro2<string opc, bit direction, list<dag> pattern = []> let Inst{11-8} = cop; let Inst{7-4} = opc1; let Inst{3-0} = CRm; + + let DecoderMethod = "DecodeMRRC2"; } def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */, - [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2, + [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPRnopc:$Rt, GPRnopc:$Rt2, imm:$CRm)]>; def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>; @@ -4689,22 +4646,32 @@ def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>; // // Move to ARM core register from Special Register -def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, +def MRS : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", []> { bits<4> Rd; let Inst{23-16} = 0b00001111; + let Unpredictable{19-17} = 0b111; + let Inst{15-12} = Rd; - let Inst{7-4} = 0b0000; + + let Inst{11-0} = 0b000000000000; + let Unpredictable{11-0} = 0b110100001111; } -def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPR:$Rd, pred:$p)>, Requires<[IsARM]>; +def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPRnopc:$Rd, pred:$p)>, Requires<[IsARM]>; -def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, +// The MRSsys instruction is the MRS instruction from the ARM ARM, +// section B9.3.9, with the R bit set to 1. +def MRSsys : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", []> { bits<4> Rd; let Inst{23-16} = 0b01001111; + let Unpredictable{19-16} = 0b1111; + let Inst{15-12} = Rd; - let Inst{7-4} = 0b0000; + + let Inst{11-0} = 0b000000000000; + let Unpredictable{11-0} = 0b110100001111; } // Move from ARM core register to Special Register @@ -4868,36 +4835,15 @@ def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), // TODO: add,sub,and, 3-instr forms? -// Tail calls -def : ARMPat<(ARMtcret tcGPR:$dst), - (TCRETURNri tcGPR:$dst)>, Requires<[IsIOS]>; - -def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), - (TCRETURNdi texternalsym:$dst)>, Requires<[IsIOS]>; - -def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), - (TCRETURNdi texternalsym:$dst)>, Requires<[IsIOS]>; - -def : ARMPat<(ARMtcret tcGPR:$dst), - (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotIOS]>; - -def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), - (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotIOS]>; - -def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), - (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotIOS]>; +// Tail calls. These patterns also apply to Thumb mode. +def : Pat<(ARMtcret tcGPR:$dst), (TCRETURNri tcGPR:$dst)>; +def : Pat<(ARMtcret (i32 tglobaladdr:$dst)), (TCRETURNdi texternalsym:$dst)>; +def : Pat<(ARMtcret (i32 texternalsym:$dst)), (TCRETURNdi texternalsym:$dst)>; // Direct calls -def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>, - Requires<[IsARM, IsNotIOS]>; -def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>, - Requires<[IsARM, IsIOS]>; -def : ARMPat<(ARMcall_nolink texternalsym:$func), - (BMOVPCB_CALL texternalsym:$func)>, - Requires<[IsARM, IsNotIOS]>; +def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>; def : ARMPat<(ARMcall_nolink texternalsym:$func), - (BMOVPCBr9_CALL texternalsym:$func)>, - Requires<[IsARM, IsIOS]>; + (BMOVPCB_CALL texternalsym:$func)>; // zextload i1 -> zextload i8 def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index f61eb2b..fd8ac0b 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -530,16 +530,16 @@ def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ // Use VLDM to load a Q register as a D register pair. // This is a pseudo instruction that is expanded to VLDMD after reg alloc. def VLDMQIA - : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn), + : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), IIC_fpLoad_m, "", - [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>; + [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>; // Use VSTM to store a Q register as a D register pair. // This is a pseudo instruction that is expanded to VSTMD after reg alloc. def VSTMQIA - : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn), + : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), IIC_fpStore_m, "", - [(store (v2f64 QPR:$src), GPR:$Rn)]>; + [(store (v2f64 DPair:$src), GPR:$Rn)]>; // Classes for VLD* pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. @@ -1938,20 +1938,11 @@ class VSTQQQQLNWBPseudo<InstrItinClass itin> // VST1LN : Vector Store (single element from one lane) class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag StoreOp, SDNode ExtractOp> + PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> : NLdStLn<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane), + (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", - [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> { - let Rm = 0b1111; - let DecoderMethod = "DecodeVST1LN"; -} -class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag StoreOp, SDNode ExtractOp> - : NLdStLn<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane), - IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", - [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{ + [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> { let Rm = 0b1111; let DecoderMethod = "DecodeVST1LN"; } @@ -1962,16 +1953,17 @@ class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> } def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-5} = lane{2-0}; } def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; let Inst{4} = Rn{5}; } -def VST1LNd32 : VST1LN32<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> { +def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, + addrmode6oneL32> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } @@ -1987,14 +1979,14 @@ def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), // ...with address register writeback: class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag StoreOp, SDNode ExtractOp> + PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, + (ins AdrMode:$Rn, am6offset:$Rm, DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn$Rm", "$Rn.addr = $wb", [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), - addrmode6:$Rn, am6offset:$Rm))]> { + AdrMode:$Rn, am6offset:$Rm))]> { let DecoderMethod = "DecodeVST1LN"; } class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> @@ -2004,16 +1996,16 @@ class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> } def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-5} = lane{2-0}; } def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; let Inst{4} = Rn{5}; } def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, - extractelt> { + extractelt, addrmode6oneL32> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } @@ -3642,7 +3634,7 @@ multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, } multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - SDNode OpNode> { + string baseOpc, SDNode OpNode> { // 64-bit vector types. def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { @@ -3676,6 +3668,33 @@ multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; // imm6 = xxxxxx + + // Aliases for two-operand forms (source and dest regs the same). + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "8 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8")) + DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "16 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v4i16")) + DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "32 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v2i32")) + DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "64 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v1i64")) + DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; + + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "8 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v16i8")) + QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "16 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v8i16")) + QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "32 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v4i32")) + QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "64 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v2i64")) + QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; } // Neon Shift-Accumulate vector operations, @@ -3986,10 +4005,10 @@ defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", @@ -4044,10 +4063,10 @@ defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", @@ -4096,23 +4115,36 @@ defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; - // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEON2,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasNEON2,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; // Fused Vector Multiply Subtract (floating-point) def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEON2,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasNEON2,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +def : Pat<(v2f32 (fma DPR:$src1, DPR:$Vn, DPR:$Vm)), + (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v4f32 (fma QPR:$src1, QPR:$Vn, QPR:$Vm)), + (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v2f32 (fma (fneg DPR:$src1), DPR:$Vn, DPR:$Vm)), + (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v4f32 (fma (fneg QPR:$src1), QPR:$Vn, QPR:$Vm)), + (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasVFP4]>; // Vector Subtract Operations. @@ -4614,8 +4646,10 @@ defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; // VSHR : Vector Shift Right (Immediate) -defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>; -defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>; +defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", + NEONvshrs>; +defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", + NEONvshru>; // VSHLL : Vector Shift Left Long defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; @@ -4649,8 +4683,10 @@ defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu>; // VRSHR : Vector Rounding Shift Right -defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>; -defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>; +defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", + NEONvrshrs>; +defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", + NEONvrshru>; // VRSHRN : Vector Rounding Shift Right and Narrow defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", @@ -4795,12 +4831,12 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, // Vector Swap def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, - (outs DPR:$Vd, DPR:$Vd1), (ins DPR:$Vm, DPR:$Vm1), - NoItinerary, "vswp", "$Vd, $Vd1", "$Vm = $Vd, $Vm1 = $Vd1", + (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), + NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", []>; def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, - (outs QPR:$Vd, QPR:$Vd1), (ins QPR:$Vm, QPR:$Vm1), - NoItinerary, "vswp", "$Vd, $Vd1", "$Vm = $Vd, $Vm1 = $Vd1", + (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), + NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", []>; // Vector Move Operations. @@ -5342,7 +5378,9 @@ def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; -def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">; +// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. +def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", + (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; @@ -5352,7 +5390,9 @@ def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; -def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">; +// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. +def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", + (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; @@ -5462,13 +5502,13 @@ def : N3VSPat<fadd, VADDfd>; def : N3VSPat<fsub, VSUBfd>; def : N3VSPat<fmul, VMULfd>; def : N3VSMulOpPat<fmul, fadd, VMLAfd>, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; def : N3VSMulOpPat<fmul, fsub, VMLSfd>, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; def : N3VSMulOpPat<fmul, fadd, VFMAfd>, - Requires<[HasNEON2, UseNEONForFP,FPContractions]>; + Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; def : N3VSMulOpPat<fmul, fsub, VFMSfd>, - Requires<[HasNEON2, UseNEONForFP,FPContractions]>; + Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; def : N2VSPat<fabs, VABSfd>; def : N2VSPat<fneg, VNEGfd>; def : N3VSPat<NEONfmax, VMAXfd>; @@ -5594,6 +5634,7 @@ multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, // extload, zextload and sextload for a lengthening load followed by another // lengthening load, to quadruple the initial length. +// // Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0> = // Pat<(v4i32 (extloadvi8 addrmode5:$addr)) // (EXTRACT_SUBREG (VMOVLuv4i32 @@ -5604,28 +5645,63 @@ multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, // qsub_0)>; multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, string Insn1Lanes, string Insn1Ty, string Insn2Lanes, - string Insn2Ty, SubRegIndex RegType> { + string Insn2Ty> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), + (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), + ssub_0)), dsub_0))>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), + (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), + ssub_0)), dsub_0))>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), + (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), + ssub_0)), dsub_0))>; +} + +// extload, zextload and sextload for a lengthening load followed by another +// lengthening load, to quadruple the initial length, but which ends up only +// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). +// +// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = +// Pat<(v4i32 (extloadvi8 addrmode5:$addr)) +// (EXTRACT_SUBREG (VMOVLuv4i32 +// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), +// (VLDRS addrmode5:$addr), +// ssub_0)), +// dsub_0)), +// dsub_0)>; +multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, + string Insn1Lanes, string Insn1Ty, string Insn2Lanes, + string Insn2Ty> { def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), dsub_0)), - RegType)>; + dsub_0)>; def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), dsub_0)), - RegType)>; + dsub_0)>; def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), dsub_0)), - RegType)>; + dsub_0)>; } defm : Lengthen_Single<"8", "i16", "i8">; // v8i8 -> v8i16 @@ -5636,12 +5712,12 @@ defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 defm : Lengthen_HalfSingle<"2", "i16", "i8", "8", "i16">; // v2i8 -> v2i16 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 -// Double lengthening - v4i8 -> v4i16 -> v4i32 -defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0>; +// Double lengthening - v4i8 -> v4i16 -> v4i32 +defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; // v2i8 -> v2i16 -> v2i32 -defm : Lengthen_Double<"2", "i32", "i8", "8", "i16", "4", "i32", dsub_0>; +defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; // v2i16 -> v2i32 -> v2i64 -defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64", qsub_0>; +defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 def : Pat<(v2i64 (extloadvi8 addrmode5:$addr)), @@ -5911,7 +5987,7 @@ def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -// VSHL (immediate) two-operand aliases. +// VSHR (immediate) two-operand aliases. def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", @@ -5948,6 +6024,41 @@ def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; +// VRSHL two-operand aliases. +def : NEONInstAlias<"vrshl${p}.s8 $Vdn, $Vm", + (VRSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.s16 $Vdn, $Vm", + (VRSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.s32 $Vdn, $Vm", + (VRSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.s64 $Vdn, $Vm", + (VRSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u8 $Vdn, $Vm", + (VRSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u16 $Vdn, $Vm", + (VRSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u32 $Vdn, $Vm", + (VRSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u64 $Vdn, $Vm", + (VRSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vrshl${p}.s8 $Vdn, $Vm", + (VRSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.s16 $Vdn, $Vm", + (VRSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.s32 $Vdn, $Vm", + (VRSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.s64 $Vdn, $Vm", + (VRSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u8 $Vdn, $Vm", + (VRSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u16 $Vdn, $Vm", + (VRSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u32 $Vdn, $Vm", + (VRSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u64 $Vdn, $Vm", + (VRSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + // VLD1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", @@ -6911,6 +7022,100 @@ def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm", def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm", (VSLIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; +// Two-operand variants for VHSUB. + // Signed. +def : NEONInstAlias<"vhsub${p}.s8 $Vdn, $Vm", + (VHSUBsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.s16 $Vdn, $Vm", + (VHSUBsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.s32 $Vdn, $Vm", + (VHSUBsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vhsub${p}.s8 $Vdn, $Vm", + (VHSUBsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.s16 $Vdn, $Vm", + (VHSUBsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.s32 $Vdn, $Vm", + (VHSUBsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + + // Unsigned. +def : NEONInstAlias<"vhsub${p}.u8 $Vdn, $Vm", + (VHSUBuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.u16 $Vdn, $Vm", + (VHSUBuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.u32 $Vdn, $Vm", + (VHSUBuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vhsub${p}.u8 $Vdn, $Vm", + (VHSUBuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.u16 $Vdn, $Vm", + (VHSUBuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.u32 $Vdn, $Vm", + (VHSUBuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + + +// Two-operand variants for VHADD. + // Signed. +def : NEONInstAlias<"vhadd${p}.s8 $Vdn, $Vm", + (VHADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.s16 $Vdn, $Vm", + (VHADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.s32 $Vdn, $Vm", + (VHADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vhadd${p}.s8 $Vdn, $Vm", + (VHADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.s16 $Vdn, $Vm", + (VHADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.s32 $Vdn, $Vm", + (VHADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + + // Unsigned. +def : NEONInstAlias<"vhadd${p}.u8 $Vdn, $Vm", + (VHADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.u16 $Vdn, $Vm", + (VHADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.u32 $Vdn, $Vm", + (VHADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vhadd${p}.u8 $Vdn, $Vm", + (VHADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.u16 $Vdn, $Vm", + (VHADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.u32 $Vdn, $Vm", + (VHADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// Two-operand variants for VRHADD. + // Signed. +def : NEONInstAlias<"vrhadd${p}.s8 $Vdn, $Rm", + (VRHADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.s16 $Vdn, $Rm", + (VRHADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.s32 $Vdn, $Rm", + (VRHADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; + +def : NEONInstAlias<"vrhadd${p}.s8 $Vdn, $Rm", + (VRHADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.s16 $Vdn, $Rm", + (VRHADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.s32 $Vdn, $Rm", + (VRHADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; + + // Unsigned. +def : NEONInstAlias<"vrhadd${p}.u8 $Vdn, $Rm", + (VRHADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.u16 $Vdn, $Rm", + (VRHADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.u32 $Vdn, $Rm", + (VRHADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; + +def : NEONInstAlias<"vrhadd${p}.u8 $Vdn, $Rm", + (VRHADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.u16 $Vdn, $Rm", + (VRHADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.u32 $Vdn, $Rm", + (VRHADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; + // VSWP allows, but does not require, a type suffix. defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index ba1791b..6335229 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -91,6 +91,12 @@ def t_imm0_508s4 : Operand<i32> { let ParserMatchClass = t_imm0_508s4_asmoperand; let OperandType = "OPERAND_IMMEDIATE"; } +// Alias use only, so no printer is necessary. +def t_imm0_508s4_neg_asmoperand: AsmOperandClass { let Name = "Imm0_508s4Neg"; } +def t_imm0_508s4_neg : Operand<i32> { + let ParserMatchClass = t_imm0_508s4_neg_asmoperand; + let OperandType = "OPERAND_IMMEDIATE"; +} // Define Thumb specific addressing modes. @@ -345,6 +351,11 @@ def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm), let DecoderMethod = "DecodeThumbAddSPImm"; } +def : tInstAlias<"add${p} sp, $imm", + (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>; +def : tInstAlias<"add${p} sp, sp, $imm", + (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>; + // Can optionally specify SP as a three operand instruction. def : tInstAlias<"add${p} sp, sp, $imm", (tADDspi SP, t_imm0_508s4:$imm, pred:$p)>; @@ -405,14 +416,13 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { // prevent stack-pointer assignments that appear immediately before calls from // potentially appearing dead. let isCall = 1, - // On non-IOS platforms R9 is callee-saved. Defs = [LR], Uses = [SP] in { // Also used for Thumb2 def tBL : TIx2<0b11110, 0b11, 1, (outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br, "bl${p}\t$func", [(ARMtcall tglobaladdr:$func)]>, - Requires<[IsThumb, IsNotIOS]> { + Requires<[IsThumb]> { bits<22> func; let Inst{26} = func{21}; let Inst{25-16} = func{20-11}; @@ -426,7 +436,7 @@ let isCall = 1, (outs), (ins pred:$p, t_blxtarget:$func, variable_ops), IIC_Br, "blx${p}\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsThumb, HasV5T, IsNotIOS]> { + Requires<[IsThumb, HasV5T]> { bits<21> func; let Inst{25-16} = func{20-11}; let Inst{13} = 1; @@ -439,7 +449,7 @@ let isCall = 1, def tBLXr : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br, "blx${p}\t$func", [(ARMtcall GPR:$func)]>, - Requires<[IsThumb, HasV5T, IsNotIOS]>, + Requires<[IsThumb, HasV5T]>, T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24; bits<4> func; let Inst{6-3} = func; @@ -450,37 +460,7 @@ let isCall = 1, def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops), 4, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsThumb, IsThumb1Only, IsNotIOS]>; -} - -let isCall = 1, - // On IOS R9 is call-clobbered. - // R7 is marked as a use to prevent frame-pointer assignments from being - // moved above / below calls. - Defs = [LR], Uses = [R7, SP] in { - // Also used for Thumb2 - def tBLr9 : tPseudoExpand<(outs), (ins pred:$p, t_bltarget:$func, variable_ops), - 4, IIC_Br, [(ARMtcall tglobaladdr:$func)], - (tBL pred:$p, t_bltarget:$func)>, - Requires<[IsThumb, IsIOS]>; - - // ARMv5T and above, also used for Thumb2 - def tBLXi_r9 : tPseudoExpand<(outs), (ins pred:$p, t_blxtarget:$func, variable_ops), - 4, IIC_Br, [(ARMcall tglobaladdr:$func)], - (tBLXi pred:$p, t_blxtarget:$func)>, - Requires<[IsThumb, HasV5T, IsIOS]>; - - // Also used for Thumb2 - def tBLXr_r9 : tPseudoExpand<(outs), (ins pred:$p, GPR:$func, variable_ops), - 2, IIC_Br, [(ARMtcall GPR:$func)], - (tBLXr pred:$p, GPR:$func)>, - Requires<[IsThumb, HasV5T, IsIOS]>; - - // ARMv4T - def tBXr9_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops), - 4, IIC_Br, - [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsThumb, IsThumb1Only, IsIOS]>; + Requires<[IsThumb, IsThumb1Only]>; } let isBranch = 1, isTerminator = 1, isBarrier = 1 in { @@ -524,24 +504,20 @@ let isBranch = 1, isTerminator = 1 in let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // IOS versions. let Uses = [SP] in { - // tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls - // on IOS), so it's in ARMInstrThumb2.td. def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), 4, IIC_Br, [], (tBX GPR:$dst, (ops 14, zero_reg))>, - Requires<[IsThumb, IsIOS]>; + Requires<[IsThumb]>; } - // Non-IOS versions (the difference is R9). + // tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls + // on IOS), so it's in ARMInstrThumb2.td. + // Non-IOS version: let Uses = [SP] in { def tTAILJMPdND : tPseudoExpand<(outs), (ins t_brtarget:$dst, pred:$p, variable_ops), 4, IIC_Br, [], (tB t_brtarget:$dst, pred:$p)>, Requires<[IsThumb, IsNotIOS]>; - def tTAILJMPrND : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), - 4, IIC_Br, [], - (tBX GPR:$dst, (ops 14, zero_reg))>, - Requires<[IsThumb, IsNotIOS]>; } } @@ -1307,20 +1283,14 @@ def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), // Direct calls def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>, - Requires<[IsThumb, IsNotIOS]>; -def : T1Pat<(ARMtcall texternalsym:$func), (tBLr9 texternalsym:$func)>, - Requires<[IsThumb, IsIOS]>; + Requires<[IsThumb]>; def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>, - Requires<[IsThumb, HasV5T, IsNotIOS]>; -def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi_r9 texternalsym:$func)>, - Requires<[IsThumb, HasV5T, IsIOS]>; + Requires<[IsThumb, HasV5T]>; // Indirect calls to ARM routines def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>, - Requires<[IsThumb, HasV5T, IsNotIOS]>; -def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr_r9 GPR:$dst)>, - Requires<[IsThumb, HasV5T, IsIOS]>; + Requires<[IsThumb, HasV5T]>; // zextload i1 -> zextload i8 def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr), @@ -1437,3 +1407,11 @@ def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; def : tInstAlias<"neg${s}${p} $Rd, $Rm", (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>; + +// Implied destination operand forms for shifts. +def : tInstAlias<"lsl${s}${p} $Rdm, $imm", + (tLSLri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm0_31:$imm, pred:$p)>; +def : tInstAlias<"lsr${s}${p} $Rdm, $imm", + (tLSRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>; +def : tInstAlias<"asr${s}${p} $Rdm, $imm", + (tASRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 1f7edc1..e6fb9d5 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -89,20 +89,26 @@ def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{ // t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm. def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; } def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ - return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1; + int64_t Value = -(int)N->getZExtValue(); + return Value && ARM_AM::getT2SOImmVal(Value) != -1; }], t2_so_imm_neg_XFORM> { let ParserMatchClass = t2_so_imm_neg_asmoperand; } /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095]. -def imm0_4095 : Operand<i32>, - ImmLeaf<i32, [{ +def imm0_4095_asmoperand: ImmAsmOperand { let Name = "Imm0_4095"; } +def imm0_4095 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 4096; -}]>; +}]> { + let ParserMatchClass = imm0_4095_asmoperand; +} -def imm0_4095_neg : PatLeaf<(i32 imm), [{ +def imm0_4095_neg_asmoperand: AsmOperandClass { let Name = "Imm0_4095Neg"; } +def imm0_4095_neg : Operand<i32>, PatLeaf<(i32 imm), [{ return (uint32_t)(-N->getZExtValue()) < 4096; -}], imm_neg_XFORM>; +}], imm_neg_XFORM> { + let ParserMatchClass = imm0_4095_neg_asmoperand; +} def imm0_255_neg : PatLeaf<(i32 imm), [{ return (uint32_t)(-N->getZExtValue()) < 255; @@ -2871,6 +2877,8 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq", // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { + +let isCommutable = 1 in def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm, pred:$p), 4, IIC_iCMOVr, @@ -3189,6 +3197,7 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br, let Inst{13} = target{17}; let Inst{21-16} = target{16-11}; let Inst{10-0} = target{10-0}; + let DecoderMethod = "DecodeT2BInstruction"; } let isNotDuplicable = 1, isIndirectBranch = 1 in { @@ -3268,37 +3277,19 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { Requires<[IsThumb2, IsIOS]>; } -let isCall = 1, - // On non-IOS platforms R9 is callee-saved. - Defs = [LR], Uses = [SP] in { +let isCall = 1, Defs = [LR], Uses = [SP] in { // mov lr, pc; b if callee is marked noreturn to avoid confusing the // return stack predictor. def t2BMOVPCB_CALL : tPseudoInst<(outs), (ins t_bltarget:$func, variable_ops), 6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, - Requires<[IsThumb, IsNotIOS]>; -} - -let isCall = 1, - // On IOS R9 is call-clobbered. - // R7 is marked as a use to prevent frame-pointer assignments from being - // moved above / below calls. - Defs = [LR], Uses = [R7, SP] in { - // mov lr, pc; b if callee is marked noreturn to avoid confusing the - // return stack predictor. - def t2BMOVPCBr9_CALL : tPseudoInst<(outs), - (ins t_bltarget:$func, variable_ops), - 6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, - Requires<[IsThumb, IsIOS]>; + Requires<[IsThumb]>; } // Direct calls def : T2Pat<(ARMcall_nolink texternalsym:$func), (t2BMOVPCB_CALL texternalsym:$func)>, - Requires<[IsThumb, IsNotIOS]>; -def : T2Pat<(ARMcall_nolink texternalsym:$func), - (t2BMOVPCBr9_CALL texternalsym:$func)>, - Requires<[IsThumb, IsIOS]>; + Requires<[IsThumb]>; // IT block let Defs = [ITSTATE] in @@ -3966,6 +3957,19 @@ def : t2InstAlias<"add${s}${p} $Rdn, $ShiftedRm", (t2ADDrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>; +// add w/ negative immediates is just a sub. +def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", + (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p, + cc_out:$s)>; +def : t2InstAlias<"add${p} $Rd, $Rn, $imm", + (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; +def : t2InstAlias<"add${s}${p} $Rdn, $imm", + (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p, + cc_out:$s)>; +def : t2InstAlias<"add${p} $Rdn, $imm", + (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>; + + // Aliases for SUB without the ".w" optional width specifier. def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm", (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; @@ -3981,13 +3985,14 @@ def : t2InstAlias<"sub${s}${p} $Rdn, $imm", (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"sub${p} $Rdn, $imm", (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>; +def : t2InstAlias<"sub${s}${p}.w $Rdn, $Rm", + (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias<"sub${s}${p} $Rdn, $Rm", (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias<"sub${s}${p} $Rdn, $ShiftedRm", (t2SUBrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>; - // Alias for compares without the ".w" optional width specifier. def : t2InstAlias<"cmn${p} $Rn, $Rm", (t2CMNzrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index e9d5720..3600b88 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -950,7 +950,7 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0, [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def VMLAS : ASbIn<0b11100, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -958,7 +958,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -966,10 +966,10 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>; def VMLSD : ADbI<0b11100, 0b00, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -977,7 +977,7 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def VMLSS : ASbIn<0b11100, 0b00, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -985,7 +985,7 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -993,10 +993,10 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; def VNMLAD : ADbI<0b11100, 0b01, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1004,7 +1004,7 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def VNMLAS : ASbI<0b11100, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1012,7 +1012,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1020,10 +1020,10 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; def VNMLSD : ADbI<0b11100, 0b01, 0, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1031,14 +1031,14 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def VNMLSS : ASbI<0b11100, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1046,10 +1046,10 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; //===----------------------------------------------------------------------===// // Fused FP Multiply-Accumulate Operations. @@ -1060,7 +1060,7 @@ def VFMAD : ADbI<0b11101, 0b10, 0, 0, [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFMAS : ASbIn<0b11101, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1068,17 +1068,25 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0, [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +def : Pat<(f64 (fma DPR:$Ddin, DPR:$Dn, DPR:$Dm)), + (VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma SPR:$Sdin, SPR:$Sn, SPR:$Sm)), + (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; def VFMSD : ADbI<0b11101, 0b10, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1086,7 +1094,7 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFMSS : ASbIn<0b11101, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1094,17 +1102,33 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0, [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +// (fma (fneg x), y, z) -> (vfms x, y, z) +def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm)), + (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm)), + (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; +// (fneg (fma x, (fneg y), z) -> (vfms x, y, z) +def : Pat<(fneg (f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm))), + (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(fneg (f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm))), + (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; def VFNMAD : ADbI<0b11101, 0b01, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1112,7 +1136,7 @@ def VFNMAD : ADbI<0b11101, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFNMAS : ASbI<0b11101, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1120,17 +1144,33 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0, [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +// (fneg (fma x, y, z)) -> (vfnma x, y, z) +def : Pat<(fneg (fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm))), + (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(fneg (fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm))), + (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; +// (fma (fneg x), y, (fneg z)) -> (vfnma x, y, z) +def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, (fneg DPR:$Dm))), + (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, (fneg SPR:$Sm))), + (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; def VFNMSD : ADbI<0b11101, 0b01, 0, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1138,24 +1178,40 @@ def VFNMSD : ADbI<0b11101, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFNMSS : ASbI<0b11101, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +// (fneg (fma (fneg x), y, z)) -> (vnfms x, y, z) +def : Pat<(fneg (f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm))), + (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(fneg (f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm))), + (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; +// (fma x, (fneg y), z) -> (vnfms x, y, z) +def : Pat<(f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm)), + (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm)), + (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; //===----------------------------------------------------------------------===// // FP Conditional moves. diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index 753e578..c5db211 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -13,7 +13,7 @@ #define DEBUG_TYPE "jit" #include "ARMJITInfo.h" -#include "ARMInstrInfo.h" +#include "ARM.h" #include "ARMConstantPoolValue.h" #include "ARMRelocations.h" #include "ARMSubtarget.h" diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 0f6dc04..9ef2ace 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -15,8 +15,8 @@ #define DEBUG_TYPE "arm-ldst-opt" #include "ARM.h" #include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" -#include "ARMRegisterInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -93,7 +93,9 @@ namespace { bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int Offset, unsigned Base, bool BaseKill, int Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, - DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); + DebugLoc dl, + ArrayRef<std::pair<unsigned, bool> > Regs, + ArrayRef<unsigned> ImpDefs); void MergeOpsUpdate(MachineBasicBlock &MBB, MemOpQueue &MemOps, unsigned memOpsBegin, @@ -282,7 +284,8 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, int Offset, unsigned Base, bool BaseKill, int Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, DebugLoc dl, - SmallVector<std::pair<unsigned, bool>, 8> &Regs) { + ArrayRef<std::pair<unsigned, bool> > Regs, + ArrayRef<unsigned> ImpDefs) { // Only a single register to load / store. Don't bother. unsigned NumRegs = Regs.size(); if (NumRegs <= 1) @@ -350,6 +353,10 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) | getKillRegState(Regs[i].second)); + // Add implicit defs for super-registers. + for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i) + MIB.addReg(ImpDefs[i], RegState::ImplicitDefine); + return true; } @@ -384,19 +391,29 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, } SmallVector<std::pair<unsigned, bool>, 8> Regs; + SmallVector<unsigned, 8> ImpDefs; for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { unsigned Reg = memOps[i].Reg; // If we are inserting the merged operation after an operation that // uses the same register, make sure to transfer any kill flag. bool isKill = memOps[i].isKill || KilledRegs.count(Reg); Regs.push_back(std::make_pair(Reg, isKill)); + + // Collect any implicit defs of super-registers. They must be preserved. + for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead()) + continue; + unsigned DefReg = MO->getReg(); + if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) + ImpDefs.push_back(DefReg); + } } // Try to do the merge. MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; ++Loc; if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, - Pred, PredReg, Scratch, dl, Regs)) + Pred, PredReg, Scratch, dl, Regs, ImpDefs)) return; // Merge succeeded, update records. @@ -537,7 +554,7 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, if (!(MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && (MI->getOperand(2).getImm()*Scale) == Bytes && - llvm::getInstrPredicate(MI, MyPredReg) == Pred && + getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg)) return false; @@ -570,7 +587,7 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, if (!(MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && (MI->getOperand(2).getImm()*Scale) == Bytes && - llvm::getInstrPredicate(MI, MyPredReg) == Pred && + getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg)) return false; @@ -701,7 +718,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, bool BaseKill = MI->getOperand(0).isKill(); unsigned Bytes = getLSMultipleTransferSize(MI); unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); int Opcode = MI->getOpcode(); DebugLoc dl = MI->getDebugLoc(); @@ -854,7 +871,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, return false; unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); bool DoMerge = false; ARM_AM::AddrOpc AddSub = ARM_AM::add; unsigned NewOpc = 0; @@ -1112,7 +1129,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); int OffImm = getMemoryOpOffset(MI); unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); if (OddRegNum > EvenRegNum && OffImm == 0) { // Ascending register numbers and no offset. It's safe to change it to a @@ -1143,6 +1160,11 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, unsigned NewOpc = (isLd) ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); + // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, + // so adjust and use t2LDRi12 here for that. + unsigned NewOpc2 = (isLd) + ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) + : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); DebugLoc dl = MBBI->getDebugLoc(); // If this is a load and base register is killed, it may have been // re-defed by the load, make sure the first load does not clobber it. @@ -1150,11 +1172,13 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, (BaseKill || OffKill) && (TRI->regsOverlap(EvenReg, BaseReg))) { assert(!TRI->regsOverlap(OddReg, BaseReg)); - InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, + InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, OddReg, OddDeadKill, false, BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); NewBBI = llvm::prior(MBBI); + if (isT2 && NewOpc == ARM::t2LDRi8 && OffImm+4 >= 0) + NewOpc = ARM::t2LDRi12; InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, false, BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, @@ -1167,12 +1191,16 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, EvenDeadKill = false; OddDeadKill = true; } + // Never kill the base register in the first instruction. + // <rdar://problem/11101911> + if (EvenReg == BaseReg) + EvenDeadKill = false; InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, EvenUndef, BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); NewBBI = llvm::prior(MBBI); - InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, + InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, OddReg, OddDeadKill, OddUndef, BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, Pred, PredReg, TII, isT2); @@ -1223,7 +1251,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { bool isKill = MO.isDef() ? false : MO.isKill(); unsigned Base = MBBI->getOperand(1).getReg(); unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); int Offset = getMemoryOpOffset(MBBI); // Watch out for: // r4 := ldr [r5] @@ -1599,7 +1627,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, if (EvenReg == OddReg) return false; BaseReg = Op0->getOperand(1).getReg(); - Pred = llvm::getInstrPredicate(Op0, PredReg); + Pred = getInstrPredicate(Op0, PredReg); dl = Op0->getDebugLoc(); return true; } @@ -1796,7 +1824,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { if (!isMemoryOp(MI)) continue; unsigned PredReg = 0; - if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) + if (getInstrPredicate(MI, PredReg) != ARMCC::AL) continue; int Opc = MI->getOpcode(); diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 1327fb8..1466e98 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -314,7 +314,8 @@ def TuplesOE2D : RegisterTuples<[dsub_0, dsub_1], def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, (interleave QPR, TuplesOE2D)> { // Allocate starting at non-VFP2 registers D16-D31 first. - let AltOrders = [(rotl DPair, 16)]; + // Prefer even-odd pairs as they are easier to copy. + let AltOrders = [(add (rotl QPR, 8), (rotl DPair, 16))]; let AltOrderSelect = [{ return 1; }]; } diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index 8d86c01..8b1fb93 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -324,6 +324,15 @@ def CortexA8Itineraries : ProcessorItineraries< InstrStage<19, [A8_NPipe], 0>, InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, // + // Single-precision Fused FP MAC + InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>, + // + // Double-precision Fused FP MAC + InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<19, [A8_NPipe], 0>, + InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, + // // Single-precision FP DIV InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<20, [A8_NPipe], 0>, @@ -860,6 +869,16 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, // + // Double-register Fused FP Multiple-Accumulate + InstrItinData<IIC_VFMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>, + // + // Quad-register Fused FP Multiple-Accumulate + // Result written in N9, but that is relative to the last cycle of multicycle, + // so we use 10 for those cases + InstrItinData<IIC_VFMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, + // // Double-register Reciprical Step InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [9, 2, 2]>, diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 49fedf6..0d710cc 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -604,6 +604,22 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<2, [A9_NPipe]>], [9, 1, 1, 1]>, // + // Single-precision Fused FP MAC + InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<9, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_NPipe]>], + [8, 1, 1, 1]>, + // + // Double-precision Fused FP MAC + InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<10, [A9_DRegsN], 0, Reserved>, + InstrStage<2, [A9_NPipe]>], + [9, 1, 1, 1]>, + // // Single-precision FP DIV InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, @@ -1697,6 +1713,26 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>, // + // Double-register Fused FP Multiple-Accumulate + InstrItinData<IIC_VFMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe]>], + [6, 3, 2, 1]>, + // + // Quad-register Fused FP Multiple-Accumulate + // Result written in N9, but that is relative to the last cycle of multicycle, + // so we use 10 for those cases + InstrItinData<IIC_VFMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 9 cycles + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<4, [A9_NPipe]>], + [8, 4, 2, 1]>, + // // Double-register Reciprical Step InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index 4d959f5..0ace9bc 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -243,6 +243,12 @@ def ARMV6Itineraries : ProcessorItineraries< // Double-precision FP MAC InstrItinData<IIC_fpMAC64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>, // + // Single-precision Fused FP MAC + InstrItinData<IIC_fpFMAC32, [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>, + // + // Double-precision Fused FP MAC + InstrItinData<IIC_fpFMAC64, [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>, + // // Single-precision FP DIV InstrItinData<IIC_fpDIV32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>, // diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 1e8cda5..ca172ed 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -16,7 +16,6 @@ #include "llvm/GlobalValue.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" -#include "llvm/ADT/SmallVector.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR @@ -49,7 +48,6 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , HasVFPv3(false) , HasVFPv4(false) , HasNEON(false) - , HasNEON2(false) , UseNEONForSinglePrecisionFP(false) , SlowFPVMLx(false) , HasVMLxForwarding(false) diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 3d9c03d..e72b06f 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -45,13 +45,12 @@ protected: bool HasV6T2Ops; bool HasV7Ops; - /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON, HasNEONVFPv4 - Specify what + /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON - Specify what /// floating point ISAs are supported. bool HasVFPv2; bool HasVFPv3; bool HasVFPv4; bool HasNEON; - bool HasNEON2; /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been /// specified. Use the method useNEONForSinglePrecisionFP() to @@ -205,7 +204,6 @@ protected: bool hasVFP3() const { return HasVFPv3; } bool hasVFP4() const { return HasVFPv4; } bool hasNEON() const { return HasNEON; } - bool hasNEON2() const { return HasNEON2 || (HasNEON && HasVFPv4); } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 44229ad..047efc2 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -158,8 +158,10 @@ bool ARMPassConfig::addPreRegAlloc() { bool ARMPassConfig::addPreSched2() { // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (getOptLevel() != CodeGenOpt::None) { - if (!getARMSubtarget().isThumb1Only()) + if (!getARMSubtarget().isThumb1Only()) { PM.add(createARMLoadStoreOptimizationPass()); + printAndVerify("After ARM load / store optimizer"); + } if (getARMSubtarget().hasNEON()) PM.add(createExecutionDependencyFixPass(&ARM::DPRRegClass)); } @@ -192,7 +194,8 @@ bool ARMPassConfig::addPreEmitPass() { return true; } -bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) { +bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, + JITCodeEmitter &JCE) { // Machine code emitter pass for ARM. PM.add(createARMJITCodeEmitterPass(*this, JCE)); return false; diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp index eb8aaf2..fda8536 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp @@ -17,8 +17,6 @@ #include "llvm/Support/TargetRegistry.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSwitch.h" #include <string> diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 911eb13..2c53e3f 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -82,8 +82,14 @@ class ARMAsmParser : public MCTargetAsmParser { MCAsmParser &getParser() const { return Parser; } MCAsmLexer &getLexer() const { return Parser.getLexer(); } - void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } - bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } + bool Warning(SMLoc L, const Twine &Msg, + ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { + return Parser.Warning(L, Msg, Ranges); + } + bool Error(SMLoc L, const Twine &Msg, + ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { + return Parser.Error(L, Msg, Ranges); + } int tryParseRegister(); bool tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &); @@ -478,6 +484,8 @@ public: /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } + SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } + ARMCC::CondCodes getCondCode() const { assert(Kind == k_CondCode && "Invalid access!"); return CC.Val; @@ -579,6 +587,14 @@ public: int64_t Value = CE->getValue(); return ((Value & 3) == 0) && Value >= 0 && Value <= 508; } + bool isImm0_508s4Neg() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = -CE->getValue(); + // explicitly exclude zero. we want that to use the normal 0_508 version. + return ((Value & 3) == 0) && Value > 0 && Value <= 508; + } bool isImm0_255() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -586,6 +602,20 @@ public: int64_t Value = CE->getValue(); return Value >= 0 && Value < 256; } + bool isImm0_4095() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 4096; + } + bool isImm0_4095Neg() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = -CE->getValue(); + return Value > 0 && Value < 4096; + } bool isImm0_1() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -782,7 +812,9 @@ public: const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); - return ARM_AM::getSOImmVal(-Value) != -1; + // Only use this when not representable as a plain so_imm. + return ARM_AM::getSOImmVal(Value) == -1 && + ARM_AM::getSOImmVal(-Value) != -1; } bool isT2SOImm() const { if (!isImm()) return false; @@ -803,7 +835,9 @@ public: const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); - return ARM_AM::getT2SOImmVal(-Value) != -1; + // Only use this when not representable as a plain so_imm. + return ARM_AM::getT2SOImmVal(Value) == -1 && + ARM_AM::getT2SOImmVal(-Value) != -1; } bool isSetEndImm() const { if (!isImm()) return false; @@ -1495,6 +1529,14 @@ public: Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4)); } + void addImm0_508s4NegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate is scaled by four in the encoding and is stored + // in the MCInst as such. Lop off the low two bits here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(-(CE->getValue() / 4))); + } + void addImm0_508s4Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate is scaled by four in the encoding and is stored @@ -1553,6 +1595,14 @@ public: Inst.addOperand(MCOperand::CreateImm(-CE->getValue())); } + void addImm0_4095NegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The operand is actually an imm0_4095, but we have its + // negation in the assembly source, so twiddle it here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(-CE->getValue())); + } + void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The operand is actually a so_imm, but we have its bitwise @@ -3324,7 +3374,8 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { FlagsVal = 8; // No flag } } else if (SpecReg == "cpsr" || SpecReg == "spsr") { - if (Flags == "all") // cpsr_all is an alias for cpsr_fc + // cpsr_all is an alias for cpsr_fc, as is plain cpsr. + if (Flags == "all" || Flags == "") Flags = "fc"; for (int i = 0, e = Flags.size(); i != e; ++i) { unsigned Flag = StringSwitch<unsigned>(Flags.substr(i, 1)) @@ -4475,22 +4526,26 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, case AsmToken::Dollar: case AsmToken::Hash: { // #42 -> immediate. - // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate S = Parser.getTok().getLoc(); Parser.Lex(); - bool isNegative = Parser.getTok().is(AsmToken::Minus); - const MCExpr *ImmVal; - if (getParser().ParseExpression(ImmVal)) - return true; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal); - if (CE) { - int32_t Val = CE->getValue(); - if (isNegative && Val == 0) - ImmVal = MCConstantExpr::Create(INT32_MIN, getContext()); + + if (Parser.getTok().isNot(AsmToken::Colon)) { + bool isNegative = Parser.getTok().is(AsmToken::Minus); + const MCExpr *ImmVal; + if (getParser().ParseExpression(ImmVal)) + return true; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal); + if (CE) { + int32_t Val = CE->getValue(); + if (isNegative && Val == 0) + ImmVal = MCConstantExpr::Create(INT32_MIN, getContext()); + } + E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); + return false; } - E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); - return false; + // w/ a ':' after the '#', it's just like a plain ':'. + // FALLTHROUGH } case AsmToken::Colon: { // ":lower16:" and ":upper16:" expression prefixes @@ -4616,6 +4671,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" || Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" || Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" || + Mnemonic == "vfms" || Mnemonic == "vfnms" || (Mnemonic == "movs" && isThumb()))) { Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1); CarrySetting = true; @@ -4659,6 +4715,7 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, Mnemonic == "orr" || Mnemonic == "mvn" || Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" || Mnemonic == "sbc" || Mnemonic == "eor" || Mnemonic == "neg" || + Mnemonic == "vfm" || Mnemonic == "vfnm" || (!isThumb() && (Mnemonic == "smull" || Mnemonic == "mov" || Mnemonic == "mla" || Mnemonic == "smlal" || Mnemonic == "umlal" || Mnemonic == "umull"))) { @@ -4727,7 +4784,7 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, static_cast<ARMOperand*>(Operands[4])->isReg() && static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::SP && static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && - (static_cast<ARMOperand*>(Operands[5])->isReg() || + ((Mnemonic == "add" &&static_cast<ARMOperand*>(Operands[5])->isReg()) || static_cast<ARMOperand*>(Operands[5])->isImm0_1020s4())) return true; // For Thumb2, add/sub immediate does not have a cc_out operand for the @@ -4811,7 +4868,10 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, (Operands.size() == 5 || Operands.size() == 6) && static_cast<ARMOperand*>(Operands[3])->isReg() && static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::SP && - static_cast<ARMOperand*>(Operands[1])->getReg() == 0) + static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && + (static_cast<ARMOperand*>(Operands[4])->isImm() || + (Operands.size() == 6 && + static_cast<ARMOperand*>(Operands[5])->isImm()))) return true; return false; @@ -6602,6 +6662,37 @@ processInstruction(MCInst &Inst, return true; } + // Handle encoding choice for the shift-immediate instructions. + case ARM::t2LSLri: + case ARM::t2LSRri: + case ARM::t2ASRri: { + if (isARMLowRegister(Inst.getOperand(0).getReg()) && + Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && + Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && + !(static_cast<ARMOperand*>(Operands[3])->isToken() && + static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) { + unsigned NewOpc; + switch (Inst.getOpcode()) { + default: llvm_unreachable("unexpected opcode"); + case ARM::t2LSLri: NewOpc = ARM::tLSLri; break; + case ARM::t2LSRri: NewOpc = ARM::tLSRri; break; + case ARM::t2ASRri: NewOpc = ARM::tASRri; break; + } + // The Thumb1 operands aren't in the same order. Awesome, eh? + MCInst TmpInst; + TmpInst.setOpcode(NewOpc); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(5)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + return false; + } + // Handle the Thumb2 mode MOV complex aliases. case ARM::t2MOVsr: case ARM::t2MOVSsr: { @@ -6833,7 +6924,7 @@ processInstruction(MCInst &Inst, // explicitly specified. From the ARM ARM: "Encoding T1 is preferred // to encoding T2 if <Rd> is specified and encoding T2 is preferred // to encoding T1 if <Rd> is omitted." - if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) { + if ((unsigned)Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) { Inst.setOpcode(ARM::tADDi3); return true; } @@ -6843,11 +6934,37 @@ processInstruction(MCInst &Inst, // explicitly specified. From the ARM ARM: "Encoding T1 is preferred // to encoding T2 if <Rd> is specified and encoding T2 is preferred // to encoding T1 if <Rd> is omitted." - if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) { + if ((unsigned)Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) { Inst.setOpcode(ARM::tSUBi3); return true; } break; + case ARM::t2ADDri: + case ARM::t2SUBri: { + // If the destination and first source operand are the same, and + // the flags are compatible with the current IT status, use encoding T2 + // instead of T3. For compatibility with the system 'as'. Make sure the + // wide encoding wasn't explicit. + if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() || + !isARMLowRegister(Inst.getOperand(0).getReg()) || + (unsigned)Inst.getOperand(2).getImm() > 255 || + ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) || + (inITBlock() && Inst.getOperand(5).getReg() != 0)) || + (static_cast<ARMOperand*>(Operands[3])->isToken() && + static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) + break; + MCInst TmpInst; + TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ? + ARM::tADDi8 : ARM::tSUBi8); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(5)); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } case ARM::t2ADDrr: { // If the destination and first source operand are the same, and // there's no setting of the flags, use encoding T2 instead of T3. @@ -6964,7 +7081,7 @@ processInstruction(MCInst &Inst, // If we can use the 16-bit encoding and the user didn't explicitly // request the 32-bit variant, transform it here. if (isARMLowRegister(Inst.getOperand(0).getReg()) && - Inst.getOperand(1).getImm() <= 255 && + (unsigned)Inst.getOperand(1).getImm() <= 255 && ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL && Inst.getOperand(4).getReg() == ARM::CPSR) || (inITBlock() && Inst.getOperand(4).getReg() == 0)) && @@ -7216,7 +7333,8 @@ MatchAndEmitInstruction(SMLoc IDLoc, return Error(ErrorLoc, "invalid operand for instruction"); } case Match_MnemonicFail: - return Error(IDLoc, "invalid instruction"); + return Error(IDLoc, "invalid instruction", + ((ARMOperand*)Operands[0])->getLocRange()); case Match_ConversionFail: // The converter function will have already emited a diagnostic. return true; diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index ce4587b..912935d 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -9,8 +9,6 @@ #define DEBUG_TYPE "arm-disassembler" -#include "ARM.h" -#include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMMCExpr.h" #include "MCTargetDesc/ARMBaseInfo.h" @@ -20,6 +18,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/ErrorHandling.h" @@ -103,228 +102,232 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) { // Forward declare these because the autogenerated code will reference them. // Definitions are further down. -static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, +static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, +static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPairSpacedRegisterClass(llvm::MCInst &Inst, +static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, +static DecodeStatus DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode3Instruction(llvm::MCInst &Inst,unsigned Insn, +static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst & Inst, +static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst & Inst, unsigned Insn, uint64_t Adddress, const void *Decoder); -static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeBranchImmInstruction(llvm::MCInst &Inst,unsigned Insn, +static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeNEONModImmInstruction(llvm::MCInst &Inst,unsigned Val, +static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeNEONModImmInstruction(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSwap(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVCVTD(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val, +static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Val, +static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbTableBranch(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbBCCTargetOperand(llvm::MCInst &Inst,unsigned Val, +static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeIT(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2LDRDPreInstruction(llvm::MCInst &Inst,unsigned Insn, +static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2STRDPreInstruction(llvm::MCInst &Inst,unsigned Insn, +static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2Adr(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); - - +static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); #include "ARMGenDisassemblerTables.inc" #include "ARMGenInstrInfo.inc" #include "ARMGenEDInfo.inc" @@ -856,7 +859,7 @@ static const uint16_t GPRDecoderTable[] = { ARM::R12, ARM::SP, ARM::LR, ARM::PC }; -static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; @@ -867,7 +870,7 @@ static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, } static DecodeStatus -DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -879,14 +882,14 @@ DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return S; } -static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); } -static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { unsigned Register = 0; switch (RegNo) { @@ -916,7 +919,7 @@ static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo == 13 || RegNo == 15) return MCDisassembler::Fail; return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); @@ -933,7 +936,7 @@ static const uint16_t SPRDecoderTable[] = { ARM::S28, ARM::S29, ARM::S30, ARM::S31 }; -static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -954,7 +957,7 @@ static const uint16_t DPRDecoderTable[] = { ARM::D28, ARM::D29, ARM::D30, ARM::D31 }; -static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -964,7 +967,7 @@ static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; @@ -972,7 +975,7 @@ static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, } static DecodeStatus -DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, unsigned RegNo, +DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; @@ -987,7 +990,7 @@ static const uint16_t QPRDecoderTable[] = { }; -static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -1007,7 +1010,7 @@ static const uint16_t DPairDecoderTable[] = { ARM::Q15 }; -static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 30) return MCDisassembler::Fail; @@ -1028,7 +1031,7 @@ static const uint16_t DPairSpacedDecoderTable[] = { ARM::D28_D30, ARM::D29_D31 }; -static DecodeStatus DecodeDPairSpacedRegisterClass(llvm::MCInst &Inst, +static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { @@ -1040,7 +1043,7 @@ static DecodeStatus DecodeDPairSpacedRegisterClass(llvm::MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (Val == 0xF) return MCDisassembler::Fail; // AL predicate is not allowed on Thumb1 branches. @@ -1054,7 +1057,7 @@ static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (Val) Inst.addOperand(MCOperand::CreateReg(ARM::CPSR)); @@ -1063,7 +1066,7 @@ static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { uint32_t imm = Val & 0xFF; uint32_t rot = (Val & 0xF00) >> 7; @@ -1072,7 +1075,7 @@ static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1109,7 +1112,7 @@ static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1144,7 +1147,7 @@ static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1179,7 +1182,7 @@ static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1196,7 +1199,7 @@ static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1213,7 +1216,7 @@ static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { // This operand encodes a mask of contiguous zeros between a specified MSB // and LSB. To decode it, we create the mask of all bits MSB-and-lower, @@ -1234,7 +1237,7 @@ static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1379,7 +1382,7 @@ static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1482,7 +1485,7 @@ DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1523,7 +1526,7 @@ static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Val, } static DecodeStatus -DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn, +DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1536,6 +1539,7 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn, unsigned pred = fieldFromInstruction32(Insn, 28, 4); unsigned W = fieldFromInstruction32(Insn, 21, 1); unsigned P = fieldFromInstruction32(Insn, 24, 1); + unsigned Rt2 = Rt + 1; bool writeback = (W == 1) | (P == 0); @@ -1547,7 +1551,86 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn, case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST: - if (Rt & 0x1) return MCDisassembler::Fail; + if (Rt & 0x1) S = MCDisassembler::SoftFail; + break; + default: + break; + } + switch (Inst.getOpcode()) { + case ARM::STRD: + case ARM::STRD_PRE: + case ARM::STRD_POST: + if (P == 0 && W == 1) + S = MCDisassembler::SoftFail; + + if (writeback && (Rn == 15 || Rn == Rt || Rn == Rt2)) + S = MCDisassembler::SoftFail; + if (type && Rm == 15) + S = MCDisassembler::SoftFail; + if (Rt2 == 15) + S = MCDisassembler::SoftFail; + if (!type && fieldFromInstruction32(Insn, 8, 4)) + S = MCDisassembler::SoftFail; + break; + case ARM::STRH: + case ARM::STRH_PRE: + case ARM::STRH_POST: + if (Rt == 15) + S = MCDisassembler::SoftFail; + if (writeback && (Rn == 15 || Rn == Rt)) + S = MCDisassembler::SoftFail; + if (!type && Rm == 15) + S = MCDisassembler::SoftFail; + break; + case ARM::LDRD: + case ARM::LDRD_PRE: + case ARM::LDRD_POST: + if (type && Rn == 15){ + if (Rt2 == 15) + S = MCDisassembler::SoftFail; + break; + } + if (P == 0 && W == 1) + S = MCDisassembler::SoftFail; + if (!type && (Rt2 == 15 || Rm == 15 || Rm == Rt || Rm == Rt2)) + S = MCDisassembler::SoftFail; + if (!type && writeback && Rn == 15) + S = MCDisassembler::SoftFail; + if (writeback && (Rn == Rt || Rn == Rt2)) + S = MCDisassembler::SoftFail; + break; + case ARM::LDRH: + case ARM::LDRH_PRE: + case ARM::LDRH_POST: + if (type && Rn == 15){ + if (Rt == 15) + S = MCDisassembler::SoftFail; + break; + } + if (Rt == 15) + S = MCDisassembler::SoftFail; + if (!type && Rm == 15) + S = MCDisassembler::SoftFail; + if (!type && writeback && (Rn == 15 || Rn == Rt)) + S = MCDisassembler::SoftFail; + break; + case ARM::LDRSH: + case ARM::LDRSH_PRE: + case ARM::LDRSH_POST: + case ARM::LDRSB: + case ARM::LDRSB_PRE: + case ARM::LDRSB_POST: + if (type && Rn == 15){ + if (Rt == 15) + S = MCDisassembler::SoftFail; + break; + } + if (type && (Rt == 15 || (writeback && Rn == Rt))) + S = MCDisassembler::SoftFail; + if (!type && (Rt == 15 || Rm == 15)) + S = MCDisassembler::SoftFail; + if (!type && writeback && (Rn == 15 || Rn == Rt)) + S = MCDisassembler::SoftFail; break; default: break; @@ -1634,7 +1717,7 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeRFEInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1663,7 +1746,7 @@ static DecodeStatus DecodeRFEInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst &Inst, +static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1748,7 +1831,7 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst &Inst, return S; } -static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned imod = fieldFromInstruction32(Insn, 18, 2); unsigned M = fieldFromInstruction32(Insn, 17, 1); @@ -1788,7 +1871,7 @@ static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned imod = fieldFromInstruction32(Insn, 9, 2); unsigned M = fieldFromInstruction32(Insn, 8, 1); @@ -1828,7 +1911,7 @@ static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1852,7 +1935,7 @@ static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1878,7 +1961,7 @@ static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1906,7 +1989,7 @@ static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1926,7 +2009,7 @@ static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1945,13 +2028,28 @@ static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { return DecodeGPRRegisterClass(Inst, Val, Address, Decoder); } static DecodeStatus -DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeT2BInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + unsigned imm = (fieldFromInstruction32(Insn, 0, 11) << 0) | + (fieldFromInstruction32(Insn, 11, 1) << 18) | + (fieldFromInstruction32(Insn, 13, 1) << 17) | + (fieldFromInstruction32(Insn, 16, 6) << 11) | + (fieldFromInstruction32(Insn, 26, 1) << 19); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<20>(imm<<1) + 4, + true, 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<20>(imm << 1))); + return S; +} + +static DecodeStatus +DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1977,7 +2075,7 @@ DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1994,7 +2092,7 @@ static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2183,6 +2281,8 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VLD2b8wb_register: case ARM::VLD2b16wb_register: case ARM::VLD2b32wb_register: + Inst.addOperand(MCOperand::CreateImm(0)); + break; case ARM::VLD3d8_UPD: case ARM::VLD3d16_UPD: case ARM::VLD3d32_UPD: @@ -2251,12 +2351,22 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; break; + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d32wb_fixed: + case ARM::VLD2b8wb_fixed: + case ARM::VLD2b16wb_fixed: + case ARM::VLD2b32wb_fixed: + case ARM::VLD2q8wb_fixed: + case ARM::VLD2q16wb_fixed: + case ARM::VLD2q32wb_fixed: + break; } return S; } -static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2319,6 +2429,8 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VST2b8wb_register: case ARM::VST2b16wb_register: case ARM::VST2b32wb_register: + if (Rm == 0xF) + return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(0)); break; case ARM::VST3d8_UPD: @@ -2525,7 +2637,7 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2570,7 +2682,7 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2580,7 +2692,6 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned Rm = fieldFromInstruction32(Insn, 0, 4); unsigned align = fieldFromInstruction32(Insn, 4, 1); unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2); - unsigned pred = fieldFromInstruction32(Insn, 22, 4); align *= 2*size; switch (Inst.getOpcode()) { @@ -2611,20 +2722,15 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(align)); - if (Rm == 0xD) - Inst.addOperand(MCOperand::CreateReg(0)); - else if (Rm != 0xF) { + if (Rm != 0xD && Rm != 0xF) { if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; } - if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) - return MCDisassembler::Fail; - return S; } -static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2659,7 +2765,7 @@ static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2712,7 +2818,7 @@ static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeNEONModImmInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2757,7 +2863,7 @@ DecodeNEONModImmInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2776,31 +2882,31 @@ static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::CreateImm(8 - Val)); return MCDisassembler::Success; } -static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::CreateImm(16 - Val)); return MCDisassembler::Success; } -static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::CreateImm(32 - Val)); return MCDisassembler::Success; } -static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::CreateImm(64 - Val)); return MCDisassembler::Success; } -static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2836,7 +2942,7 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2860,25 +2966,31 @@ static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn, return S; } -static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1))); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<12>(Val<<1) + 4, + true, 2, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1))); return MCDisassembler::Success; } -static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val))); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4, + true, 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val))); return MCDisassembler::Success; } -static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1))); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<7>(Val<<1) + 4, + true, 2, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1))); return MCDisassembler::Success; } -static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2893,7 +3005,7 @@ static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2907,7 +3019,7 @@ static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { unsigned imm = Val << 2; @@ -2917,7 +3029,7 @@ static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::CreateReg(ARM::SP)); Inst.addOperand(MCOperand::CreateImm(Val)); @@ -2925,7 +3037,7 @@ static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2942,7 +3054,7 @@ static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2997,7 +3109,7 @@ static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { int imm = Val & 0xFF; if (!(Val & 0x100)) imm *= -1; @@ -3006,7 +3118,7 @@ static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3021,7 +3133,7 @@ static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val, +static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3036,7 +3148,7 @@ static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val, return S; } -static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { int imm = Val & 0xFF; if (Val == 0) @@ -3049,7 +3161,7 @@ static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val, } -static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3080,7 +3192,7 @@ static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3110,7 +3222,7 @@ static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3125,7 +3237,7 @@ static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val, } -static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { unsigned imm = fieldFromInstruction16(Insn, 0, 7); @@ -3136,7 +3248,7 @@ static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Insn, return MCDisassembler::Success; } -static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3161,7 +3273,7 @@ static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn, return S; } -static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { unsigned imod = fieldFromInstruction16(Insn, 4, 1) | 0x2; unsigned flags = fieldFromInstruction16(Insn, 0, 3); @@ -3172,20 +3284,20 @@ static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn, return MCDisassembler::Success; } -static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rm = fieldFromInstruction32(Insn, 0, 4); unsigned add = fieldFromInstruction32(Insn, 4, 1); - if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(add)); return S; } -static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (!tryAddingSymbolicOperand(Address, (Address & ~2u) + SignExtend32<22>(Val << 1) + 4, @@ -3194,7 +3306,7 @@ static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (Val == 0xA || Val == 0xB) return MCDisassembler::Fail; @@ -3204,7 +3316,7 @@ static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Val, } static DecodeStatus -DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Insn, +DecodeThumbTableBranch(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3220,7 +3332,7 @@ DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3262,7 +3374,7 @@ DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Insn, // Decode a shifted immediate operand. These basically consist // of an 8-bit value, and a 4-bit directive that specifies either // a splat operation or a rotation. -static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { unsigned ctrl = fieldFromInstruction32(Val, 10, 2); if (ctrl == 0) { @@ -3294,13 +3406,15 @@ static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val, } static DecodeStatus -DecodeThumbBCCTargetOperand(llvm::MCInst &Inst, unsigned Val, +DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder){ - Inst.addOperand(MCOperand::CreateImm(Val << 1)); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<8>(Val<<1) + 4, + true, 2, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<8>(Val << 1))); return MCDisassembler::Success; } -static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder){ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4, true, 4, Inst, Decoder)) @@ -3308,7 +3422,7 @@ static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { switch (Val) { default: @@ -3328,14 +3442,14 @@ static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (!Val) return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(Val)); return MCDisassembler::Success; } -static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3358,7 +3472,7 @@ static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder){ DecodeStatus S = MCDisassembler::Success; @@ -3385,7 +3499,7 @@ static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3410,7 +3524,7 @@ static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3438,7 +3552,7 @@ static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3463,7 +3577,7 @@ static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3488,7 +3602,7 @@ static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3547,7 +3661,7 @@ static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3605,7 +3719,7 @@ static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3672,7 +3786,7 @@ static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3736,7 +3850,7 @@ static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3806,7 +3920,7 @@ static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3870,7 +3984,7 @@ static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3944,7 +4058,7 @@ static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4009,7 +4123,7 @@ static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction32(Insn, 12, 4); @@ -4035,7 +4149,7 @@ static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction32(Insn, 12, 4); @@ -4061,7 +4175,7 @@ static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned pred = fieldFromInstruction16(Insn, 4, 4); @@ -4088,7 +4202,7 @@ static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeT2LDRDPreInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4125,7 +4239,7 @@ DecodeT2LDRDPreInstruction(llvm::MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeT2STRDPreInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4159,7 +4273,7 @@ DecodeT2STRDPreInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, uint32_t Insn, +static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn, uint64_t Address, const void *Decoder) { unsigned sign1 = fieldFromInstruction32(Insn, 21, 1); unsigned sign2 = fieldFromInstruction32(Insn, 23, 1); @@ -4174,7 +4288,7 @@ static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, uint32_t Insn, return MCDisassembler::Success; } -static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, uint32_t Val, +static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4185,7 +4299,7 @@ static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, uint32_t Val, return S; } -static DecodeStatus DecodeSwap(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Rt = fieldFromInstruction32(Insn, 12, 4); unsigned Rt2 = fieldFromInstruction32(Insn, 0, 4); @@ -4196,6 +4310,10 @@ static DecodeStatus DecodeSwap(llvm::MCInst &Inst, unsigned Insn, return DecodeCPSInstruction(Inst, Insn, Address, Decoder); DecodeStatus S = MCDisassembler::Success; + + if (Rt == Rn || Rn == Rt2) + S = MCDisassembler::SoftFail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder))) return MCDisassembler::Fail; if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder))) @@ -4208,7 +4326,7 @@ static DecodeStatus DecodeSwap(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVCVTD(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0); Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4); @@ -4236,7 +4354,7 @@ static DecodeStatus DecodeVCVTD(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0); Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4); @@ -4263,3 +4381,59 @@ static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn, return S; } + +static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 16, 4); + unsigned Rt = fieldFromInstruction32(Val, 12, 4); + unsigned Rm = fieldFromInstruction32(Val, 0, 4); + Rm |= (fieldFromInstruction32(Val, 23, 1) << 4); + unsigned Cond = fieldFromInstruction32(Val, 28, 4); + + if (fieldFromInstruction32(Val, 8, 4) != 0 || Rn == Rt) + S = MCDisassembler::SoftFail; + + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeAddrMode7Operand(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePostIdxReg(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, Cond, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + + DecodeStatus S = MCDisassembler::Success; + + unsigned CRm = fieldFromInstruction32(Val, 0, 4); + unsigned opc1 = fieldFromInstruction32(Val, 4, 4); + unsigned cop = fieldFromInstruction32(Val, 8, 4); + unsigned Rt = fieldFromInstruction32(Val, 12, 4); + unsigned Rt2 = fieldFromInstruction32(Val, 16, 4); + + if ((cop & ~0x1) == 0xa) + return MCDisassembler::Fail; + + if (Rt == Rt2) + S = MCDisassembler::SoftFail; + + Inst.addOperand(MCOperand::CreateImm(cop)); + Inst.addOperand(MCOperand::CreateImm(opc1)); + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(CRm)); + + return S; +} + diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt index 94075a9..52d8338 100644 --- a/lib/Target/ARM/Disassembler/LLVMBuild.txt +++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = ARMDisassembler parent = ARM -required_libraries = ARMCodeGen ARMDesc ARMInfo MC Support +required_libraries = ARMDesc ARMInfo MC Support add_to_library_groups = ARM diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 2b994df..cbd81c1 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -18,11 +18,11 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -#define GET_INSTRUCTION_NAME #include "ARMGenAsmWriter.inc" /// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing. @@ -36,17 +36,14 @@ static unsigned translateShiftImm(unsigned imm) { ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) : - MCInstPrinter(MAI, MRI) { + MCInstPrinter(MAI, MII, MRI) { // Initialize the set of available features. setAvailableFeatures(STI.getFeatureBits()); } -StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} - void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << getRegisterName(RegNo); } @@ -212,12 +209,12 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); // If a symbolic branch target was added as a constant expression then print - // that address in hex. + // that address in hex. And only print 32 unsigned bits for the address. const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr()); int64_t Address; if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { O << "0x"; - O.write_hex(Address); + O.write_hex((uint32_t)Address); } else { // Otherwise, just print the expression. diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index e9cd407..8acb7ee 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -23,15 +23,12 @@ class MCOperand; class ARMInstPrinter : public MCInstPrinter { public: - ARMInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI); + ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); - virtual StringRef getOpcodeName(unsigned Opcode) const; virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - static const char *getInstructionName(unsigned Opcode); - // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 25849ee..d10bfc1 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -11,11 +11,11 @@ #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/ADT/Twine.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" @@ -78,7 +78,8 @@ public: { "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_bl", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_uncondbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_condbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, @@ -115,6 +116,9 @@ public: // twiddled. if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 && (unsigned)Fixup.getKind() != ARM::fixup_t2_ldst_pcrel_12 && + (unsigned)Fixup.getKind() != ARM::fixup_arm_adr_pcrel_12 && + (unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 && + (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 && (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) { if (A) { const MCSymbol &Sym = A->getSymbol().AliasedSymbol(); @@ -128,7 +132,8 @@ public: if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx || (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl || (unsigned)Fixup.getKind() == ARM::fixup_arm_blx || - (unsigned)Fixup.getKind() == ARM::fixup_arm_bl)) + (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl || + (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl)) IsResolved = false; } @@ -366,7 +371,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: - case ARM::fixup_arm_bl: + case ARM::fixup_arm_uncondbl: + case ARM::fixup_arm_condbl: case ARM::fixup_arm_blx: // These values don't encode the low two bits since they're always zero. // Offset by 8 just as above. @@ -466,7 +472,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { Value = -Value; isAdd = false; } + // The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8]. assert ((Value < 256) && "Out of range pc-relative fixup value!"); + Value = (Value & 0xf) | ((Value & 0xf0) << 4); return Value | (isAdd << 23); } case ARM::fixup_arm_pcrel_10: @@ -577,7 +585,8 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case ARM::fixup_arm_ldst_pcrel_12: case ARM::fixup_arm_pcrel_10: case ARM::fixup_arm_adr_pcrel_12: - case ARM::fixup_arm_bl: + case ARM::fixup_arm_uncondbl: + case ARM::fixup_arm_condbl: case ARM::fixup_arm_blx: case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 5476a46..aa649ba 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -11,6 +11,7 @@ #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -177,7 +178,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, break; } break; - case ARM::fixup_arm_bl: + case ARM::fixup_arm_uncondbl: case ARM::fixup_arm_blx: case ARM::fixup_arm_uncondbranch: switch (Modifier) { @@ -189,6 +190,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, break; } break; + case ARM::fixup_arm_condbl: case ARM::fixup_arm_condbranch: Type = ELF::R_ARM_JUMP24; break; diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index 1827986..0085feb 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h +++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h @@ -59,8 +59,21 @@ enum Fixups { // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions. fixup_arm_thumb_br, - // fixup_arm_bl - Fixup for ARM BL instructions. - fixup_arm_bl, + // The following fixups handle the ARM BL instructions. These can be + // conditionalised; however, the ARM ELF ABI requires a different relocation + // in that case: R_ARM_JUMP24 instead of R_ARM_CALL. The difference is that + // R_ARM_CALL is allowed to change the instruction to a BLX inline, which has + // no conditional version; R_ARM_JUMP24 would have to insert a veneer. + // + // MachO does not draw a distinction between the two cases, so it will treat + // fixup_arm_uncondbl and fixup_arm_condbl as identical fixups. + + // fixup_arm_uncondbl - Fixup for unconditional ARM BL instructions. + fixup_arm_uncondbl, + + // fixup_arm_condbl - Fixup for ARM BL instructions with nontrivial + // conditionalisation. + fixup_arm_condbl, // fixup_arm_blx - Fixup for ARM BLX instructions. fixup_arm_blx, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 4445dcd..10d1c48 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -597,8 +597,12 @@ uint32_t ARMMCCodeEmitter:: getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand MO = MI.getOperand(OpIdx); - if (MO.isExpr()) - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_bl, Fixups); + if (MO.isExpr()) { + if (HasConditionalBranch(MI)) + return ::getBranchTargetOpValue(MI, OpIdx, + ARM::fixup_arm_condbl, Fixups); + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_uncondbl, Fixups); + } return MO.getImm() >> 2; } @@ -1330,8 +1334,8 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, // LDM/STM: // {15-0} = Bitfield of GPRs. unsigned Reg = MI.getOperand(Op).getReg(); - bool SPRRegs = llvm::ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg); - bool DPRRegs = llvm::ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg); + bool SPRRegs = ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg); + bool DPRRegs = ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg); unsigned Binary = 0; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index ed27f9f..e3512cd 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -163,10 +163,11 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, static MCInstPrinter *createARMMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) - return new ARMInstPrinter(MAI, MRI, STI); + return new ARMInstPrinter(MAI, MII, MRI, STI); return 0; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 9d3da14..8057cb6 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -82,7 +82,8 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, case ARM::fixup_arm_adr_pcrel_12: case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: - case ARM::fixup_arm_bl: + case ARM::fixup_arm_uncondbl: + case ARM::fixup_arm_condbl: case ARM::fixup_arm_blx: RelocType = unsigned(macho::RIT_ARM_Branch24Bit); // Report as 'long', even though that is not quite accurate. diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 4fcaecf..3eddda8 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -501,11 +501,6 @@ those operations and the ARMv6 scalar versions. //===---------------------------------------------------------------------===// -ARM::MOVCCr is commutable (by flipping the condition). But we need to implement -ARMInstrInfo::commuteInstruction() to support it. - -//===---------------------------------------------------------------------===// - Split out LDR (literal) from normal ARM LDR instruction. Also consider spliting LDR into imm12 and so_reg forms. This allows us to clean up some code. e.g. ARMLoadStoreOptimizer does not need to look at LDR (literal) and LDR (so_reg) diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 8cf7cac..e03e758 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCInst.h" using namespace llvm; diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index 27fce9b..36af204 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -17,7 +17,6 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "Thumb1RegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" namespace llvm { class ARMSubtarget; diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index def75dd..ecb4c2f 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -154,7 +154,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI, ++I; if (I != E) { unsigned NPredReg = 0; - ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg); + ARMCC::CondCodes NCC = getITInstrPredicate(I, NPredReg); if (NCC == CC || NCC == OCC) return true; } @@ -171,7 +171,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { MachineInstr *MI = &*MBBI; DebugLoc dl = MI->getDebugLoc(); unsigned PredReg = 0; - ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); + ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg); if (CC == ARMCC::AL) { ++MBBI; continue; @@ -207,7 +207,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { MI = NMI; unsigned NPredReg = 0; - ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg); + ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg); if (NCC == CC || NCC == OCC) { Mask |= (NCC & 1) << Pos; // Add implicit use of ITSTATE. diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 2fe4b85..8ab486b 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" @@ -59,7 +58,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, // If the first instruction of Tail is predicated, we may have to update // the IT instruction. unsigned PredReg = 0; - ARMCC::CondCodes CC = llvm::getInstrPredicate(Tail, PredReg); + ARMCC::CondCodes CC = getInstrPredicate(Tail, PredReg); MachineBasicBlock::iterator MBBI = Tail; if (CC != ARMCC::AL) // Expecting at least the t2IT instruction before it. @@ -107,7 +106,7 @@ Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, } unsigned PredReg = 0; - return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL; + return getITInstrPredicate(MBBI, PredReg) == ARMCC::AL; } void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, @@ -574,7 +573,7 @@ Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI, return; unsigned PredReg = 0; - ARMCC::CondCodes CC = llvm::getInstrPredicate(UseMI, PredReg); + ARMCC::CondCodes CC = getInstrPredicate(UseMI, PredReg); if (CC == ARMCC::AL || PredReg != ARM::CPSR) return; @@ -590,7 +589,7 @@ Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI, continue; MachineInstr *NMI = &*MBBI; - ARMCC::CondCodes NCC = llvm::getInstrPredicate(NMI, PredReg); + ARMCC::CondCodes NCC = getInstrPredicate(NMI, PredReg); if (!(NCC == CC || NCC == OCC) || NMI->modifiesRegister(SrcReg, &TRI) || NMI->modifiesRegister(ARM::CPSR, &TRI)) @@ -611,5 +610,5 @@ llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { unsigned Opc = MI->getOpcode(); if (Opc == ARM::tBcc || Opc == ARM::t2Bcc) return ARMCC::AL; - return llvm::getInstrPredicate(MI, PredReg); + return getInstrPredicate(MI, PredReg); } diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 1ae2ef1..0911f8a 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -15,9 +15,8 @@ #define THUMB2INSTRUCTIONINFO_H #include "ARM.h" -#include "ARMInstrInfo.h" +#include "ARMBaseInstrInfo.h" #include "Thumb2RegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" namespace llvm { class ARMSubtarget; diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index fb9d93b..b5a397e 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -851,7 +851,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { // If this BB loops back to itself, conservatively avoid narrowing the // first instruction that does partial flag update. bool IsSelfLoop = MBB.isSuccessor(&MBB); - MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), E = MBB.instr_end(); + MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end(); MachineBasicBlock::instr_iterator NextMII; for (; MII != E; MII = NextMII) { NextMII = llvm::next(MII); |