diff options
Diffstat (limited to 'lib/Target/ARM')
46 files changed, 3610 insertions, 1515 deletions
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index bbca228..6ae287a 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -493,11 +493,21 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, return false; } - // These modifiers are not yet supported. - case 'p': // The high single-precision register of a VFP double-precision - // register. case 'e': // The low doubleword register of a NEON quad register. - case 'f': // The high doubleword register of a NEON quad register. + case 'f': { // The high doubleword register of a NEON quad register. + if (!MI->getOperand(OpNum).isReg()) + return true; + unsigned Reg = MI->getOperand(OpNum).getReg(); + if (!ARM::QPRRegClass.contains(Reg)) + return true; + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ? + ARM::dsub_0 : ARM::dsub_1); + O << ARMInstPrinter::getRegisterName(SubReg); + return false; + } + + // These modifiers are not yet supported. case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1. case 'H': // The highest-numbered register of a pair. return true; @@ -739,14 +749,14 @@ void ARMAsmPrinter::emitAttributes() { } // Signal various FP modes. - if (!UnsafeFPMath) { + if (!TM.Options.UnsafeFPMath) { AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::Allowed); AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions, ARMBuildAttrs::Allowed); } - if (NoInfsFPMath && NoNaNsFPMath) + if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath) AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model, ARMBuildAttrs::Allowed); else @@ -759,7 +769,7 @@ void ARMAsmPrinter::emitAttributes() { AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1); // Hard float. Use both S and D registers and conform to AAPCS-VFP. - if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) { + if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) { AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3); AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1); } @@ -1069,7 +1079,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { } // Try to figure out the unwinding opcode out of src / dst regs. - if (MI->getDesc().mayStore()) { + if (MI->mayStore()) { // Register saves. assert(DstReg == ARM::SP && "Only stack pointer as a destination reg is supported"); @@ -1481,11 +1491,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { /// in the function. The first operand is the ID# for this instruction, the /// second is the index into the MachineConstantPool that this is, the third /// is the size in bytes of this constant pool entry. + /// The required alignment is specified on the basic block holding this MI. unsigned LabelId = (unsigned)MI->getOperand(0).getImm(); unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex(); - EmitAlignment(2); - // Mark the constant pool entry as data if we're not already in a data // region. OutStreamer.EmitDataRegion(); @@ -1934,4 +1943,3 @@ extern "C" void LLVMInitializeARMAsmPrinter() { RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget); RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget); } - diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 9315348..8bf5475 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -146,7 +146,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); - bool isLoad = !MCID.mayStore(); + bool isLoad = !MI->mayStore(); const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); const MachineOperand &Base = MI->getOperand(2); const MachineOperand &Offset = MI->getOperand(NumOps-3); @@ -439,6 +439,22 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { return false; } +bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const { + if (MI->isBundle()) { + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + int PIdx = I->findFirstPredOperandIdx(); + if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL) + return true; + } + return false; + } + + int PIdx = MI->findFirstPredOperandIdx(); + return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; +} + bool ARMBaseInstrInfo:: PredicateInstruction(MachineInstr *MI, const SmallVectorImpl<MachineOperand> &Pred) const { @@ -491,7 +507,7 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, std::vector<MachineOperand> &Pred) const { // FIXME: This confuses implicit_def with optional CPSR def. const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef()) + if (!MCID.getImplicitDefs() && !MI->hasOptionalDef()) return false; bool Found = false; @@ -510,11 +526,10 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, /// By default, this returns true for every instruction with a /// PredicateOperand. bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.isPredicable()) + if (!MI->isPredicable()) return false; - if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { + if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { ARMFunctionInfo *AFI = MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); return AFI->isThumb2Function(); @@ -548,7 +563,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); if (MI->isLabel()) return 0; - unsigned Opc = MI->getOpcode(); + unsigned Opc = MI->getOpcode(); switch (Opc) { case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: @@ -556,6 +571,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case TargetOpcode::EH_LABEL: case TargetOpcode::DBG_VALUE: return 0; + case TargetOpcode::BUNDLE: + return getInstBundleLength(MI); case ARM::MOVi16_ga_pcrel: case ARM::MOVTi16_ga_pcrel: case ARM::t2MOVi16_ga_pcrel: @@ -593,7 +610,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); unsigned NumOps = MCID.getNumOperands(); MachineOperand JTOP = - MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2)); + MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); unsigned JTI = JTOP.getIndex(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); assert(MJTI != 0); @@ -622,6 +639,17 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { return 0; // Not reached } +unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const { + unsigned Size = 0; + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + assert(!I->isBundle() && "No nested bundle!"); + Size += GetInstSizeInBytes(&*I); + } + return Size; +} + void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -845,7 +873,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const { const MachineMemOperand *Dummy; - return MI->getDesc().mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); + return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); } void ARMBaseInstrInfo:: @@ -991,7 +1019,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const { const MachineMemOperand *Dummy; - return MI->getDesc().mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); + return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); } bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ @@ -1357,7 +1385,7 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, return false; // Terminators and labels can't be scheduled around. - if (MI->getDesc().isTerminator() || MI->isLabel()) + if (MI->isTerminator() || MI->isLabel()) return true; // Treat the start of the IT block as a scheduling boundary, but schedule @@ -1762,8 +1790,7 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, // Check that CPSR isn't set between the comparison instruction and the one we // want to change. - MachineBasicBlock::const_iterator I = CmpInstr, E = MI, - B = MI->getParent()->begin(); + MachineBasicBlock::iterator I = CmpInstr,E = MI, B = MI->getParent()->begin(); // Early exit if CmpInstr is at the beginning of the BB. if (I == B) return false; @@ -1957,7 +1984,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, bool isKill = UseMI->getOperand(OpIdx).isKill(); unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(), - *UseMI, UseMI->getDebugLoc(), + UseMI, UseMI->getDebugLoc(), get(NewUseOpc), NewReg) .addReg(Reg1, getKillRegState(isKill)) .addImm(SOImmValV1))); @@ -2332,6 +2359,59 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return UseCycle; } +static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, + const MachineInstr *MI, unsigned Reg, + unsigned &DefIdx, unsigned &Dist) { + Dist = 0; + + MachineBasicBlock::const_iterator I = MI; ++I; + MachineBasicBlock::const_instr_iterator II = + llvm::prior(I.getInstrIterator()); + assert(II->isInsideBundle() && "Empty bundle?"); + + int Idx = -1; + while (II->isInsideBundle()) { + Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI); + if (Idx != -1) + break; + --II; + ++Dist; + } + + assert(Idx != -1 && "Cannot find bundled definition!"); + DefIdx = Idx; + return II; +} + +static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, + const MachineInstr *MI, unsigned Reg, + unsigned &UseIdx, unsigned &Dist) { + Dist = 0; + + MachineBasicBlock::const_instr_iterator II = MI; ++II; + assert(II->isInsideBundle() && "Empty bundle?"); + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + + // FIXME: This doesn't properly handle multiple uses. + int Idx = -1; + while (II != E && II->isInsideBundle()) { + Idx = II->findRegisterUseOperandIdx(Reg, false, TRI); + if (Idx != -1) + break; + if (II->getOpcode() != ARM::t2IT) + ++Dist; + ++II; + } + + if (Idx == -1) { + Dist = 0; + return 0; + } + + UseIdx = Idx; + return II; +} + int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, @@ -2340,35 +2420,77 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, DefMI->isRegSequence() || DefMI->isImplicitDef()) return 1; - const MCInstrDesc &DefMCID = DefMI->getDesc(); if (!ItinData || ItinData->isEmpty()) - return DefMCID.mayLoad() ? 3 : 1; + return DefMI->mayLoad() ? 3 : 1; - const MCInstrDesc &UseMCID = UseMI->getDesc(); + const MCInstrDesc *DefMCID = &DefMI->getDesc(); + const MCInstrDesc *UseMCID = &UseMI->getDesc(); const MachineOperand &DefMO = DefMI->getOperand(DefIdx); - if (DefMO.getReg() == ARM::CPSR) { + unsigned Reg = DefMO.getReg(); + if (Reg == ARM::CPSR) { if (DefMI->getOpcode() == ARM::FMSTAT) { // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) return Subtarget.isCortexA9() ? 1 : 20; } // CPSR set and branch can be paired in the same cycle. - if (UseMCID.isBranch()) + if (UseMI->isBranch()) return 0; + + // Otherwise it takes the instruction latency (generally one). + int Latency = getInstrLatency(ItinData, DefMI); + + // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to + // its uses. Instructions which are otherwise scheduled between them may + // incur a code size penalty (not able to use the CPSR setting 16-bit + // instructions). + if (Latency > 0 && Subtarget.isThumb2()) { + const MachineFunction *MF = DefMI->getParent()->getParent(); + if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + --Latency; + } + return Latency; } unsigned DefAlign = DefMI->hasOneMemOperand() ? (*DefMI->memoperands_begin())->getAlignment() : 0; unsigned UseAlign = UseMI->hasOneMemOperand() ? (*UseMI->memoperands_begin())->getAlignment() : 0; - int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, - UseMCID, UseIdx, UseAlign); + + unsigned DefAdj = 0; + if (DefMI->isBundle()) { + DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj); + if (DefMI->isCopyLike() || DefMI->isInsertSubreg() || + DefMI->isRegSequence() || DefMI->isImplicitDef()) + return 1; + DefMCID = &DefMI->getDesc(); + } + unsigned UseAdj = 0; + if (UseMI->isBundle()) { + unsigned NewUseIdx; + const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI, + Reg, NewUseIdx, UseAdj); + if (NewUseMI) { + UseMI = NewUseMI; + UseIdx = NewUseIdx; + UseMCID = &UseMI->getDesc(); + } + } + + int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign, + *UseMCID, UseIdx, UseAlign); + int Adj = DefAdj + UseAdj; + if (Adj) { + Latency -= (int)(DefAdj + UseAdj); + if (Latency < 1) + return 1; + } if (Latency > 1 && (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. - switch (DefMCID.getOpcode()) { + switch (DefMCID->getOpcode()) { default: break; case ARM::LDRrs: case ARM::LDRBrs: { @@ -2393,7 +2515,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } if (DefAlign < 8 && Subtarget.isCortexA9()) - switch (DefMCID.getOpcode()) { + switch (DefMCID->getOpcode()) { default: break; case ARM::VLD1q8: case ARM::VLD1q16: @@ -2413,12 +2535,18 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD2q8: case ARM::VLD2q16: case ARM::VLD2q32: - case ARM::VLD2d8_UPD: - case ARM::VLD2d16_UPD: - case ARM::VLD2d32_UPD: - case ARM::VLD2q8_UPD: - case ARM::VLD2q16_UPD: - case ARM::VLD2q32_UPD: + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d32wb_fixed: + case ARM::VLD2q8wb_fixed: + case ARM::VLD2q16wb_fixed: + case ARM::VLD2q32wb_fixed: + case ARM::VLD2d8wb_register: + case ARM::VLD2d16wb_register: + case ARM::VLD2d32wb_register: + case ARM::VLD2q8wb_register: + case ARM::VLD2q16wb_register: + case ARM::VLD2q32wb_register: case ARM::VLD3d8: case ARM::VLD3d16: case ARM::VLD3d32: @@ -2446,9 +2574,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD1DUPq8: case ARM::VLD1DUPq16: case ARM::VLD1DUPq32: - case ARM::VLD1DUPq8_UPD: - case ARM::VLD1DUPq16_UPD: - case ARM::VLD1DUPq32_UPD: + case ARM::VLD1DUPq8wb_fixed: + case ARM::VLD1DUPq16wb_fixed: + case ARM::VLD1DUPq32wb_fixed: + case ARM::VLD1DUPq8wb_register: + case ARM::VLD1DUPq16wb_register: + case ARM::VLD1DUPq32wb_register: case ARM::VLD2DUPd8: case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: @@ -2580,12 +2711,18 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD2q8Pseudo: case ARM::VLD2q16Pseudo: case ARM::VLD2q32Pseudo: - case ARM::VLD2d8Pseudo_UPD: - case ARM::VLD2d16Pseudo_UPD: - case ARM::VLD2d32Pseudo_UPD: - case ARM::VLD2q8Pseudo_UPD: - case ARM::VLD2q16Pseudo_UPD: - case ARM::VLD2q32Pseudo_UPD: + case ARM::VLD2d8PseudoWB_fixed: + case ARM::VLD2d16PseudoWB_fixed: + case ARM::VLD2d32PseudoWB_fixed: + case ARM::VLD2q8PseudoWB_fixed: + case ARM::VLD2q16PseudoWB_fixed: + case ARM::VLD2q32PseudoWB_fixed: + case ARM::VLD2d8PseudoWB_register: + case ARM::VLD2d16PseudoWB_register: + case ARM::VLD2d32PseudoWB_register: + case ARM::VLD2q8PseudoWB_register: + case ARM::VLD2q16PseudoWB_register: + case ARM::VLD2q32PseudoWB_register: case ARM::VLD3d8Pseudo: case ARM::VLD3d16Pseudo: case ARM::VLD3d32Pseudo: @@ -2621,9 +2758,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD1DUPq8Pseudo: case ARM::VLD1DUPq16Pseudo: case ARM::VLD1DUPq32Pseudo: - case ARM::VLD1DUPq8Pseudo_UPD: - case ARM::VLD1DUPq16Pseudo_UPD: - case ARM::VLD1DUPq32Pseudo_UPD: + case ARM::VLD1DUPq8PseudoWB_fixed: + case ARM::VLD1DUPq16PseudoWB_fixed: + case ARM::VLD1DUPq32PseudoWB_fixed: + case ARM::VLD1DUPq8PseudoWB_register: + case ARM::VLD1DUPq16PseudoWB_register: + case ARM::VLD1DUPq32PseudoWB_register: case ARM::VLD2DUPd8Pseudo: case ARM::VLD2DUPd16Pseudo: case ARM::VLD2DUPd32Pseudo: @@ -2671,6 +2811,19 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } +unsigned +ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *DepMI) const { + unsigned Reg = DefMI->getOperand(DefIdx).getReg(); + if (DepMI->readsRegister(Reg, &getRegisterInfo()) || !isPredicated(DepMI)) + return 1; + + // If the second MI is predicated, then there is an implicit use dependency. + return getOperandLatency(ItinData, DefMI, DefIdx, DepMI, + DepMI->getNumOperands()); +} + int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost) const { @@ -2681,6 +2834,17 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, if (!ItinData || ItinData->isEmpty()) return 1; + if (MI->isBundle()) { + int Latency = 0; + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + if (I->getOpcode() != ARM::t2IT) + Latency += getInstrLatency(ItinData, I, PredCost); + } + return Latency; + } + const MCInstrDesc &MCID = MI->getDesc(); unsigned Class = MCID.getSchedClass(); unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 0f9f321..68e8208 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -69,10 +69,7 @@ public: bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; // Predication support. - bool isPredicated(const MachineInstr *MI) const { - int PIdx = MI->findFirstPredOperandIdx(); - return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; - } + bool isPredicated(const MachineInstr *MI) const; ARMCC::CondCodes getPredicate(const MachineInstr *MI) const { int PIdx = MI->findFirstPredOperandIdx(); @@ -213,12 +210,18 @@ public: SDNode *DefNode, unsigned DefIdx, SDNode *UseNode, unsigned UseIdx) const; + virtual unsigned getOutputLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *DepMI) const; + /// VFP/NEON execution domains. std::pair<uint16_t, uint16_t> getExecutionDomain(const MachineInstr *MI) const; void setExecutionDomain(MachineInstr *MI, unsigned Domain) const; private: + unsigned getInstBundleLength(const MachineInstr *MI) const; + int getVLDMDefCycle(const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID, unsigned DefClass, diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 7c42342..8ee6ce2 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -631,7 +631,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { // 1. Dynamic stack realignment is explicitly disabled, // 2. This is a Thumb1 function (it's not useful, so we don't bother), or // 3. There are VLAs in the function and the base pointer is disabled. - return (RealignStack && !AFI->isThumb1OnlyFunction() && + return (MF.getTarget().Options.RealignStack && !AFI->isThumb1OnlyFunction() && (!MFI->hasVarSizedObjects() || EnableBasePointer)); } @@ -649,7 +649,7 @@ needsStackRealignment(const MachineFunction &MF) const { bool ARMBaseRegisterInfo:: cannotEliminateFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - if (DisableFramePointerElim(MF) && MFI->adjustsStack()) + if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack()) return true; return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || needsStackRealignment(MF); diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index d74ccfa..365f0bb 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -401,7 +401,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { MCE.StartMachineBasicBlock(MBB); - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) emitInstruction(*I); } diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 3e3a413..2039d41 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -26,6 +26,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -51,6 +52,43 @@ static cl::opt<bool> AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true), cl::desc("Adjust basic block layout to better use TB[BH]")); +static cl::opt<bool> +AlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true), + cl::desc("Align constant islands in code")); + +/// UnknownPadding - Return the worst case padding that could result from +/// unknown offset bits. This does not include alignment padding caused by +/// known offset bits. +/// +/// @param LogAlign log2(alignment) +/// @param KnownBits Number of known low offset bits. +static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { + if (KnownBits < LogAlign) + return (1u << LogAlign) - (1u << KnownBits); + return 0; +} + +/// WorstCaseAlign - Assuming only the low KnownBits bits in Offset are exact, +/// add padding such that: +/// +/// 1. The result is aligned to 1 << LogAlign. +/// +/// 2. No other value of the unknown bits would require more padding. +/// +/// This may add more padding than is required to satisfy just one of the +/// constraints. It is necessary to compute alignment this way to guarantee +/// that we don't underestimate the padding before an aligned block. If the +/// real padding before a block is larger than we think, constant pool entries +/// may go out of range. +static inline unsigned WorstCaseAlign(unsigned Offset, unsigned LogAlign, + unsigned KnownBits) { + // Add the worst possible padding that the unknown bits could cause. + Offset += UnknownPadding(LogAlign, KnownBits); + + // Then align the result. + return RoundUpToAlignment(Offset, 1u << LogAlign); +} + namespace { /// ARMConstantIslands - Due to limited PC-relative displacements, ARM /// requires constant pool entries to be scattered among the instructions @@ -64,16 +102,70 @@ namespace { /// CPE - A constant pool entry that has been placed somewhere, which /// tracks a list of users. class ARMConstantIslands : public MachineFunctionPass { - /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed - /// by MBB Number. The two-byte pads required for Thumb alignment are - /// counted as part of the following block (i.e., the offset and size for - /// a padded block will both be ==2 mod 4). - std::vector<unsigned> BBSizes; + /// BasicBlockInfo - Information about the offset and size of a single + /// basic block. + struct BasicBlockInfo { + /// Offset - Distance from the beginning of the function to the beginning + /// of this basic block. + /// + /// The offset is always aligned as required by the basic block. + unsigned Offset; + + /// Size - Size of the basic block in bytes. If the block contains + /// inline assembly, this is a worst case estimate. + /// + /// The size does not include any alignment padding whether from the + /// beginning of the block, or from an aligned jump table at the end. + unsigned Size; + + /// KnownBits - The number of low bits in Offset that are known to be + /// exact. The remaining bits of Offset are an upper bound. + uint8_t KnownBits; + + /// Unalign - When non-zero, the block contains instructions (inline asm) + /// of unknown size. The real size may be smaller than Size bytes by a + /// multiple of 1 << Unalign. + uint8_t Unalign; + + /// PostAlign - When non-zero, the block terminator contains a .align + /// directive, so the end of the block is aligned to 1 << PostAlign + /// bytes. + uint8_t PostAlign; + + BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0), + PostAlign(0) {} + + /// Compute the number of known offset bits internally to this block. + /// This number should be used to predict worst case padding when + /// splitting the block. + unsigned internalKnownBits() const { + return Unalign ? Unalign : KnownBits; + } + + /// Compute the offset immediately following this block. If LogAlign is + /// specified, return the offset the successor block will get if it has + /// this alignment. + unsigned postOffset(unsigned LogAlign = 0) const { + unsigned PO = Offset + Size; + unsigned LA = std::max(unsigned(PostAlign), LogAlign); + if (!LA) + return PO; + // Add alignment padding from the terminator. + return WorstCaseAlign(PO, LA, internalKnownBits()); + } + + /// Compute the number of known low bits of postOffset. If this block + /// contains inline asm, the number of known bits drops to the + /// instruction alignment. An aligned terminator may increase the number + /// of know bits. + /// If LogAlign is given, also consider the alignment of the next block. + unsigned postKnownBits(unsigned LogAlign = 0) const { + return std::max(std::max(unsigned(PostAlign), LogAlign), + internalKnownBits()); + } + }; - /// BBOffsets - the offset of each MBB in bytes, starting from 0. - /// The two-byte pads required for Thumb alignment are counted as part of - /// the following block. - std::vector<unsigned> BBOffsets; + std::vector<BasicBlockInfo> BBInfo; /// WaterList - A sorted list of basic blocks where islands could be placed /// (i.e. blocks that don't fall through to the following block, due @@ -162,9 +254,8 @@ namespace { /// the branch fix up pass. bool HasFarJump; - /// HasInlineAsm - True if the function contains inline assembly. - bool HasInlineAsm; - + MachineFunction *MF; + MachineConstantPool *MCP; const ARMInstrInfo *TII; const ARMSubtarget *STI; ARMFunctionInfo *AFI; @@ -182,67 +273,65 @@ namespace { } private: - void DoInitialPlacement(MachineFunction &MF, - std::vector<MachineInstr*> &CPEMIs); + void DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); - void JumpTableFunctionScan(MachineFunction &MF); - void InitialFunctionScan(MachineFunction &MF, - const std::vector<MachineInstr*> &CPEMIs); + unsigned getCPELogAlign(const MachineInstr *CPEMI); + void JumpTableFunctionScan(); + void InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs); MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI); void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB); - void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta); + void AdjustBBOffsetsAfter(MachineBasicBlock *BB); bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI); int LookForExistingCPEntry(CPUser& U, unsigned UserOffset); bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter); void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset, MachineBasicBlock *&NewMBB); - bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex); + bool HandleConstantPoolUser(unsigned CPUserIndex); void RemoveDeadCPEMI(MachineInstr *CPEMI); bool RemoveUnusedCPEntries(); bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset, MachineInstr *CPEMI, unsigned Disp, bool NegOk, bool DoDump = false); bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water, - CPUser &U); - bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, - unsigned Disp, bool NegativeOK, bool IsSoImm = false); + CPUser &U, unsigned &Growth); bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); - bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br); - bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br); - bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br); + bool FixUpImmediateBr(ImmBranch &Br); + bool FixUpConditionalBr(ImmBranch &Br); + bool FixUpUnconditionalBr(ImmBranch &Br); bool UndoLRSpillRestore(); - bool OptimizeThumb2Instructions(MachineFunction &MF); - bool OptimizeThumb2Branches(MachineFunction &MF); - bool ReorderThumb2JumpTables(MachineFunction &MF); - bool OptimizeThumb2JumpTables(MachineFunction &MF); + bool OptimizeThumb2Instructions(); + bool OptimizeThumb2Branches(); + bool ReorderThumb2JumpTables(); + bool OptimizeThumb2JumpTables(); MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB); + void ComputeBlockSize(MachineBasicBlock *MBB); unsigned GetOffsetOf(MachineInstr *MI) const; void dumpBBs(); - void verify(MachineFunction &MF); + void verify(); + + bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, + unsigned Disp, bool NegativeOK, bool IsSoImm = false); + bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, + const CPUser &U) { + return OffsetIsInRange(UserOffset, TrialOffset, + U.MaxDisp, U.NegOk, U.IsSoImm); + } }; char ARMConstantIslands::ID = 0; } /// verify - check BBOffsets, BBSizes, alignment of islands -void ARMConstantIslands::verify(MachineFunction &MF) { - assert(BBOffsets.size() == BBSizes.size()); - for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i) - assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]); - if (!isThumb) - return; +void ARMConstantIslands::verify() { #ifndef NDEBUG - for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = MBBI; - if (!MBB->empty() && - MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) { - unsigned MBBId = MBB->getNumber(); - assert(HasInlineAsm || - (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) || - (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0)); - } + unsigned Align = MBB->getAlignment(); + unsigned MBBId = MBB->getNumber(); + assert(BBInfo[MBBId].Offset % (1u << Align) == 0); + assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); } for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { CPUser &U = CPUsers[i]; @@ -257,10 +346,16 @@ void ARMConstantIslands::verify(MachineFunction &MF) { /// print block size and offset information - debugging void ARMConstantIslands::dumpBBs() { - for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) { - DEBUG(errs() << "block " << J << " offset " << BBOffsets[J] - << " size " << BBSizes[J] << "\n"); - } + DEBUG({ + for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { + const BasicBlockInfo &BBI = BBInfo[J]; + dbgs() << format("%08x BB#%u\t", BBI.Offset, J) + << " kb=" << unsigned(BBI.KnownBits) + << " ua=" << unsigned(BBI.Unalign) + << " pa=" << unsigned(BBI.PostAlign) + << format(" size=%#x\n", BBInfo[J].Size); + } + }); } /// createARMConstantIslandPass - returns an instance of the constpool @@ -269,34 +364,38 @@ FunctionPass *llvm::createARMConstantIslandPass() { return new ARMConstantIslands(); } -bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { - MachineConstantPool &MCP = *MF.getConstantPool(); +bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + MCP = mf.getConstantPool(); - TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo(); - AFI = MF.getInfo<ARMFunctionInfo>(); - STI = &MF.getTarget().getSubtarget<ARMSubtarget>(); + DEBUG(dbgs() << "***** ARMConstantIslands: " + << MCP->getConstants().size() << " CP entries, aligned to " + << MCP->getConstantPoolAlignment() << " bytes *****\n"); + + TII = (const ARMInstrInfo*)MF->getTarget().getInstrInfo(); + AFI = MF->getInfo<ARMFunctionInfo>(); + STI = &MF->getTarget().getSubtarget<ARMSubtarget>(); isThumb = AFI->isThumbFunction(); isThumb1 = AFI->isThumb1OnlyFunction(); isThumb2 = AFI->isThumb2Function(); HasFarJump = false; - HasInlineAsm = false; // Renumber all of the machine basic blocks in the function, guaranteeing that // the numbers agree with the position of the block in the function. - MF.RenumberBlocks(); + MF->RenumberBlocks(); // Try to reorder and otherwise adjust the block layout to make good use // of the TB[BH] instructions. bool MadeChange = false; if (isThumb2 && AdjustJumpTableBlocks) { - JumpTableFunctionScan(MF); - MadeChange |= ReorderThumb2JumpTables(MF); + JumpTableFunctionScan(); + MadeChange |= ReorderThumb2JumpTables(); // Data is out of date, so clear it. It'll be re-computed later. T2JumpTables.clear(); // Blocks may have shifted around. Keep the numbering up to date. - MF.RenumberBlocks(); + MF->RenumberBlocks(); } // Thumb1 functions containing constant pools get 4-byte alignment. @@ -304,16 +403,13 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // ARM and Thumb2 functions need to be 4-byte aligned. if (!isThumb1) - MF.EnsureAlignment(2); // 2 = log2(4) + MF->EnsureAlignment(2); // 2 = log2(4) // Perform the initial placement of the constant pool entries. To start with, // we put them all at the end of the function. std::vector<MachineInstr*> CPEMIs; - if (!MCP.isEmpty()) { - DoInitialPlacement(MF, CPEMIs); - if (isThumb1) - MF.EnsureAlignment(2); // 2 = log2(4) - } + if (!MCP->isEmpty()) + DoInitialPlacement(CPEMIs); /// The next UID to take is the first unused one. AFI->initPICLabelUId(CPEMIs.size()); @@ -321,7 +417,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // Do the initial scan of the function, building up information about the // sizes of each block, the location of all the water, and finding all of the // constant pool users. - InitialFunctionScan(MF, CPEMIs); + InitialFunctionScan(CPEMIs); CPEMIs.clear(); DEBUG(dumpBBs()); @@ -333,9 +429,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // is no change. unsigned NoCPIters = 0, NoBRIters = 0; while (true) { + DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n'); bool CPChange = false; for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) - CPChange |= HandleConstantPoolUser(MF, i); + CPChange |= HandleConstantPoolUser(i); if (CPChange && ++NoCPIters > 30) llvm_unreachable("Constant Island pass failed to converge!"); DEBUG(dumpBBs()); @@ -344,9 +441,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // appear as "new water" for the next iteration of constant pool placement. NewWaterList.clear(); + DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n'); bool BRChange = false; for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) - BRChange |= FixUpImmediateBr(MF, ImmBranches[i]); + BRChange |= FixUpImmediateBr(ImmBranches[i]); if (BRChange && ++NoBRIters > 30) llvm_unreachable("Branch Fix Up pass failed to converge!"); DEBUG(dumpBBs()); @@ -358,10 +456,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // Shrink 32-bit Thumb2 branch, load, and store instructions. if (isThumb2 && !STI->prefers32BitThumb()) - MadeChange |= OptimizeThumb2Instructions(MF); + MadeChange |= OptimizeThumb2Instructions(); // After a while, this might be made debug-only, but it is not expensive. - verify(MF); + verify(); // If LR has been forced spilled and no far jump (i.e. BL) has been issued, // undo the spill / restore of LR if possible. @@ -376,10 +474,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { } } - DEBUG(errs() << '\n'; dumpBBs()); + DEBUG(dbgs() << '\n'; dumpBBs()); - BBSizes.clear(); - BBOffsets.clear(); + BBInfo.clear(); WaterList.clear(); CPUsers.clear(); CPEntries.clear(); @@ -392,37 +489,65 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { /// DoInitialPlacement - Perform the initial placement of the constant pool /// entries. To start with, we put them all at the end of the function. -void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF, - std::vector<MachineInstr*> &CPEMIs) { +void +ARMConstantIslands::DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { // Create the basic block to hold the CPE's. - MachineBasicBlock *BB = MF.CreateMachineBasicBlock(); - MF.push_back(BB); + MachineBasicBlock *BB = MF->CreateMachineBasicBlock(); + MF->push_back(BB); + + // MachineConstantPool measures alignment in bytes. We measure in log2(bytes). + unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment()); + + // Mark the basic block as required by the const-pool. + // If AlignConstantIslands isn't set, use 4-byte alignment for everything. + BB->setAlignment(AlignConstantIslands ? MaxAlign : 2); + + // The function needs to be as aligned as the basic blocks. The linker may + // move functions around based on their alignment. + MF->EnsureAlignment(BB->getAlignment()); + + // Order the entries in BB by descending alignment. That ensures correct + // alignment of all entries as long as BB is sufficiently aligned. Keep + // track of the insertion point for each alignment. We are going to bucket + // sort the entries as they are created. + SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end()); // Add all of the constants from the constant pool to the end block, use an // identity mapping of CPI's to CPE's. - const std::vector<MachineConstantPoolEntry> &CPs = - MF.getConstantPool()->getConstants(); + const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants(); - const TargetData &TD = *MF.getTarget().getTargetData(); + const TargetData &TD = *MF->getTarget().getTargetData(); for (unsigned i = 0, e = CPs.size(); i != e; ++i) { unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); - // Verify that all constant pool entries are a multiple of 4 bytes. If not, - // we would have to pad them out or something so that instructions stay - // aligned. - assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!"); + assert(Size >= 4 && "Too small constant pool entry"); + unsigned Align = CPs[i].getAlignment(); + assert(isPowerOf2_32(Align) && "Invalid alignment"); + // Verify that all constant pool entries are a multiple of their alignment. + // If not, we would have to pad them out so that instructions stay aligned. + assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!"); + + // Insert CONSTPOOL_ENTRY before entries with a smaller alignment. + unsigned LogAlign = Log2_32(Align); + MachineBasicBlock::iterator InsAt = InsPoint[LogAlign]; MachineInstr *CPEMI = - BuildMI(BB, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY)) + BuildMI(*BB, InsAt, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY)) .addImm(i).addConstantPoolIndex(i).addImm(Size); CPEMIs.push_back(CPEMI); + // Ensure that future entries with higher alignment get inserted before + // CPEMI. This is bucket sort with iterators. + for (unsigned a = LogAlign + 1; a < MaxAlign; ++a) + if (InsPoint[a] == InsAt) + InsPoint[a] = CPEMI; + // Add a new CPEntry, but no corresponding CPUser yet. std::vector<CPEntry> CPEs; CPEs.push_back(CPEntry(CPEMI, i)); CPEntries.push_back(CPEs); ++NumCPEs; - DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i - << "\n"); + DEBUG(dbgs() << "Moved CPI#" << i << " to end of function\n"); } + DEBUG(BB->dump()); } /// BBHasFallthrough - Return true if the specified basic block can fallthrough @@ -458,17 +583,33 @@ ARMConstantIslands::CPEntry return NULL; } +/// getCPELogAlign - Returns the required alignment of the constant pool entry +/// represented by CPEMI. Alignment is measured in log2(bytes) units. +unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { + assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY); + + // Everything is 4-byte aligned unless AlignConstantIslands is set. + if (!AlignConstantIslands) + return 2; + + unsigned CPI = CPEMI->getOperand(1).getIndex(); + assert(CPI < MCP->getConstants().size() && "Invalid constant pool index."); + unsigned Align = MCP->getConstants()[CPI].getAlignment(); + assert(isPowerOf2_32(Align) && "Invalid CPE alignment"); + return Log2_32(Align); +} + /// JumpTableFunctionScan - Do a scan of the function, building up /// information about the sizes of each block and the locations of all /// the jump tables. -void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) { - for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); +void ARMConstantIslands::JumpTableFunctionScan() { + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock &MBB = *MBBI; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) - if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT) + if (I->isBranch() && I->getOpcode() == ARM::t2BR_JT) T2JumpTables.push_back(I); } } @@ -476,23 +617,27 @@ void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) { /// InitialFunctionScan - Do the initial scan of the function, building up /// information about the sizes of each block, the location of all the water, /// and finding all of the constant pool users. -void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, - const std::vector<MachineInstr*> &CPEMIs) { - // First thing, see if the function has any inline assembly in it. If so, - // we have to be conservative about alignment assumptions, as we don't - // know for sure the size of any instructions in the inline assembly. - for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); - MBBI != E; ++MBBI) { - MachineBasicBlock &MBB = *MBBI; - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) - if (I->getOpcode() == ARM::INLINEASM) - HasInlineAsm = true; - } +void ARMConstantIslands:: +InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs) { + BBInfo.clear(); + BBInfo.resize(MF->getNumBlockIDs()); + + // First thing, compute the size of all basic blocks, and see if the function + // has any inline assembly in it. If so, we have to be conservative about + // alignment assumptions, as we don't know for sure the size of any + // instructions in the inline assembly. + for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) + ComputeBlockSize(I); + + // The known bits of the entry block offset are determined by the function + // alignment. + BBInfo.front().KnownBits = MF->getAlignment(); + + // Compute block offsets and known bits. + AdjustBBOffsetsAfter(MF->begin()); // Now go back through the instructions and build up our data structures. - unsigned Offset = 0; - for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock &MBB = *MBBI; @@ -501,16 +646,13 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, if (!BBHasFallthrough(&MBB)) WaterList.push_back(&MBB); - unsigned MBBSize = 0; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { if (I->isDebugValue()) continue; - // Add instruction size to MBBSize. - MBBSize += TII->GetInstSizeInBytes(I); int Opc = I->getOpcode(); - if (I->getDesc().isBranch()) { + if (I->isBranch()) { bool isCond = false; unsigned Bits = 0; unsigned Scale = 1; @@ -518,18 +660,6 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, switch (Opc) { default: continue; // Ignore other JT branches - case ARM::tBR_JTr: - // A Thumb1 table jump may involve padding; for the offsets to - // be right, functions containing these must be 4-byte aligned. - // tBR_JTr expands to a mov pc followed by .align 2 and then the jump - // table entries. So this code checks whether offset of tBR_JTr + 2 - // is aligned. That is held in Offset+MBBSize, which already has - // 2 added in for the size of the mov pc instruction. - MF.EnsureAlignment(2U); - if ((Offset+MBBSize)%4 != 0 || HasInlineAsm) - // FIXME: Add a pseudo ALIGN instruction instead. - MBBSize += 2; // padding - continue; // Does not get an entry in ImmBranches case ARM::t2BR_JT: T2JumpTables.push_back(I); continue; // Does not get an entry in ImmBranches @@ -647,18 +777,30 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, break; } } + } +} - // In thumb mode, if this block is a constpool island, we may need padding - // so it's aligned on 4 byte boundary. - if (isThumb && - !MBB.empty() && - MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY && - ((Offset%4) != 0 || HasInlineAsm)) - MBBSize += 2; - - BBSizes.push_back(MBBSize); - BBOffsets.push_back(Offset); - Offset += MBBSize; +/// ComputeBlockSize - Compute the size and some alignment information for MBB. +/// This function updates BBInfo directly. +void ARMConstantIslands::ComputeBlockSize(MachineBasicBlock *MBB) { + BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; + BBI.Size = 0; + BBI.Unalign = 0; + BBI.PostAlign = 0; + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) { + BBI.Size += TII->GetInstSizeInBytes(I); + // For inline asm, GetInstSizeInBytes returns a conservative estimate. + // The actual size may be smaller, but still a multiple of the instr size. + if (I->isInlineAsm()) + BBI.Unalign = isThumb ? 1 : 2; + } + + // tBR_JTr contains a .align 2 directive. + if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) { + BBI.PostAlign = 2; + MBB->getParent()->EnsureAlignment(2); } } @@ -671,14 +813,7 @@ unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const { // The offset is composed of two things: the sum of the sizes of all MBB's // before this instruction's block, and the offset from the start of the block // it is in. - unsigned Offset = BBOffsets[MBB->getNumber()]; - - // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has - // alignment padding, and compensate if so. - if (isThumb && - MI->getOpcode() == ARM::CONSTPOOL_ENTRY && - (Offset%4 != 0 || HasInlineAsm)) - Offset += 2; + unsigned Offset = BBInfo[MBB->getNumber()].Offset; // Sum instructions before MI in MBB. for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) { @@ -702,12 +837,9 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { // Renumber the MBB's to keep them consecutive. NewBB->getParent()->RenumberBlocks(NewBB); - // Insert a size into BBSizes to align it properly with the (newly + // Insert an entry into BBInfo to align it properly with the (newly // renumbered) block numbers. - BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0); - - // Likewise for BBOffsets. - BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0); + BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); // Next, update WaterList. Specifically, we need to add NewMBB as having // available water after it. @@ -723,13 +855,12 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { /// account for this change and returns the newly created block. MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { MachineBasicBlock *OrigBB = MI->getParent(); - MachineFunction &MF = *OrigBB->getParent(); // Create a new MBB for the code after the OrigBB. MachineBasicBlock *NewBB = - MF.CreateMachineBasicBlock(OrigBB->getBasicBlock()); + MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); MachineFunction::iterator MBBI = OrigBB; ++MBBI; - MF.insert(MBBI, NewBB); + MF->insert(MBBI, NewBB); // Splice the instructions starting with MI over to NewBB. NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); @@ -747,16 +878,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { ++NumSplit; // Update the CFG. All succs of OrigBB are now succs of NewBB. - while (!OrigBB->succ_empty()) { - MachineBasicBlock *Succ = *OrigBB->succ_begin(); - OrigBB->removeSuccessor(Succ); - NewBB->addSuccessor(Succ); - - // This pass should be run after register allocation, so there should be no - // PHI nodes to update. - assert((Succ->empty() || !Succ->begin()->isPHI()) - && "PHI nodes should be eliminated by now!"); - } + NewBB->transferSuccessors(OrigBB); // OrigBB branches to NewBB. OrigBB->addSuccessor(NewBB); @@ -764,14 +886,11 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { // Update internal data structures to account for the newly inserted MBB. // This is almost the same as UpdateForInsertedWaterBlock, except that // the Water goes after OrigBB, not NewBB. - MF.RenumberBlocks(NewBB); + MF->RenumberBlocks(NewBB); - // Insert a size into BBSizes to align it properly with the (newly + // Insert an entry into BBInfo to align it properly with the (newly // renumbered) block numbers. - BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0); - - // Likewise for BBOffsets. - BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0); + BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); // Next, update WaterList. Specifically, we need to add OrigMBB as having // available water after it (but not if it's already there, which happens @@ -787,54 +906,19 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { WaterList.insert(IP, OrigBB); NewWaterList.insert(OrigBB); - unsigned OrigBBI = OrigBB->getNumber(); - unsigned NewBBI = NewBB->getNumber(); - - int delta = isThumb1 ? 2 : 4; - // Figure out how large the OrigBB is. As the first half of the original // block, it cannot contain a tablejump. The size includes // the new jump we added. (It should be possible to do this without // recounting everything, but it's very confusing, and this is rarely // executed.) - unsigned OrigBBSize = 0; - for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end(); - I != E; ++I) - OrigBBSize += TII->GetInstSizeInBytes(I); - BBSizes[OrigBBI] = OrigBBSize; - - // ...and adjust BBOffsets for NewBB accordingly. - BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI]; + ComputeBlockSize(OrigBB); // Figure out how large the NewMBB is. As the second half of the original // block, it may contain a tablejump. - unsigned NewBBSize = 0; - for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end(); - I != E; ++I) - NewBBSize += TII->GetInstSizeInBytes(I); - // Set the size of NewBB in BBSizes. It does not include any padding now. - BBSizes[NewBBI] = NewBBSize; - - MachineInstr* ThumbJTMI = prior(NewBB->end()); - if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) { - // We've added another 2-byte instruction before this tablejump, which - // means we will always need padding if we didn't before, and vice versa. - - // The original offset of the jump instruction was: - unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta; - if (OrigOffset%4 == 0) { - // We had padding before and now we don't. No net change in code size. - delta = 0; - } else { - // We didn't have padding before and now we do. - BBSizes[NewBBI] += 2; - delta = 4; - } - } + ComputeBlockSize(NewBB); // All BBOffsets following these blocks must be modified. - if (delta) - AdjustBBOffsetsAfter(NewBB, delta); + AdjustBBOffsetsAfter(OrigBB); return NewBB; } @@ -882,19 +966,44 @@ bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset, /// WaterIsInRange - Returns true if a CPE placed after the specified /// Water (a basic block) will be in range for the specific MI. - +/// +/// Compute how much the function will grow by inserting a CPE after Water. bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset, - MachineBasicBlock* Water, CPUser &U) { - unsigned MaxDisp = U.MaxDisp; - unsigned CPEOffset = BBOffsets[Water->getNumber()] + - BBSizes[Water->getNumber()]; - - // If the CPE is to be inserted before the instruction, that will raise - // the offset of the instruction. - if (CPEOffset < UserOffset) - UserOffset += U.CPEMI->getOperand(2).getImm(); - - return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, U.NegOk, U.IsSoImm); + MachineBasicBlock* Water, CPUser &U, + unsigned &Growth) { + unsigned CPELogAlign = getCPELogAlign(U.CPEMI); + unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign); + unsigned NextBlockOffset, NextBlockAlignment; + MachineFunction::const_iterator NextBlock = Water; + if (++NextBlock == MF->end()) { + NextBlockOffset = BBInfo[Water->getNumber()].postOffset(); + NextBlockAlignment = 0; + } else { + NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset; + NextBlockAlignment = NextBlock->getAlignment(); + } + unsigned Size = U.CPEMI->getOperand(2).getImm(); + unsigned CPEEnd = CPEOffset + Size; + + // The CPE may be able to hide in the alignment padding before the next + // block. It may also cause more padding to be required if it is more aligned + // that the next block. + if (CPEEnd > NextBlockOffset) { + Growth = CPEEnd - NextBlockOffset; + // Compute the padding that would go at the end of the CPE to align the next + // block. + Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment); + + // If the CPE is to be inserted before the instruction, that will raise + // the offset of the instruction. Also account for unknown alignment padding + // in blocks between CPE and the user. + if (CPEOffset < UserOffset) + UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign); + } else + // CPE fits in existing padding. + Growth = 0; + + return OffsetIsInRange(UserOffset, CPEOffset, U); } /// CPEIsInRange - Returns true if the distance between specific MI and @@ -903,14 +1012,20 @@ bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset, MachineInstr *CPEMI, unsigned MaxDisp, bool NegOk, bool DoDump) { unsigned CPEOffset = GetOffsetOf(CPEMI); - assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE"); + assert(CPEOffset % 4 == 0 && "Misaligned CPE"); if (DoDump) { - DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm() - << " max delta=" << MaxDisp - << " insn address=" << UserOffset - << " CPE address=" << CPEOffset - << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI); + DEBUG({ + unsigned Block = MI->getParent()->getNumber(); + const BasicBlockInfo &BBI = BBInfo[Block]; + dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm() + << " max delta=" << MaxDisp + << format(" insn address=%#x", UserOffset) + << " in BB#" << Block << ": " + << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI + << format("CPE address=%#x offset=%+d: ", CPEOffset, + int(CPEOffset-UserOffset)); + }); } return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk); @@ -933,55 +1048,17 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) { } #endif // NDEBUG -void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB, - int delta) { - MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI); - for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs(); - i < e; ++i) { - BBOffsets[i] += delta; - // If some existing blocks have padding, adjust the padding as needed, a - // bit tricky. delta can be negative so don't use % on that. - if (!isThumb) - continue; - MachineBasicBlock *MBB = MBBI; - if (!MBB->empty() && !HasInlineAsm) { - // Constant pool entries require padding. - if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) { - unsigned OldOffset = BBOffsets[i] - delta; - if ((OldOffset%4) == 0 && (BBOffsets[i]%4) != 0) { - // add new padding - BBSizes[i] += 2; - delta += 2; - } else if ((OldOffset%4) != 0 && (BBOffsets[i]%4) == 0) { - // remove existing padding - BBSizes[i] -= 2; - delta -= 2; - } - } - // Thumb1 jump tables require padding. They should be at the end; - // following unconditional branches are removed by AnalyzeBranch. - // tBR_JTr expands to a mov pc followed by .align 2 and then the jump - // table entries. So this code checks whether offset of tBR_JTr - // is aligned; if it is, the offset of the jump table following the - // instruction will not be aligned, and we need padding. - MachineInstr *ThumbJTMI = prior(MBB->end()); - if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) { - unsigned NewMIOffset = GetOffsetOf(ThumbJTMI); - unsigned OldMIOffset = NewMIOffset - delta; - if ((OldMIOffset%4) == 0 && (NewMIOffset%4) != 0) { - // remove existing padding - BBSizes[i] -= 2; - delta -= 2; - } else if ((OldMIOffset%4) != 0 && (NewMIOffset%4) == 0) { - // add new padding - BBSizes[i] += 2; - delta += 2; - } - } - if (delta==0) - return; - } - MBBI = llvm::next(MBBI); +void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB) { + for(unsigned i = BB->getNumber() + 1, e = MF->getNumBlockIDs(); i < e; ++i) { + // Get the offset and known bits at the end of the layout predecessor. + // Include the alignment of the current block. + unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment(); + unsigned Offset = BBInfo[i - 1].postOffset(LogAlign); + unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign); + + // This is where block i begins. + BBInfo[i].Offset = Offset; + BBInfo[i].KnownBits = KnownBits; } } @@ -1016,7 +1093,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) // Check to see if the CPE is already in-range. if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) { - DEBUG(errs() << "In range\n"); + DEBUG(dbgs() << "In range\n"); return 1; } @@ -1031,7 +1108,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) if (CPEs[i].CPEMI == NULL) continue; if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) { - DEBUG(errs() << "Replacing CPE#" << CPI << " with CPE#" + DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" << CPEs[i].CPI << "\n"); // Point the CPUser node to the replacement U.CPEMI = CPEs[i].CPEMI; @@ -1079,10 +1156,9 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, if (WaterList.empty()) return false; - bool FoundWaterThatWouldPad = false; - water_iterator IPThatWouldPad; - for (water_iterator IP = prior(WaterList.end()), - B = WaterList.begin();; --IP) { + unsigned BestGrowth = ~0u; + for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();; + --IP) { MachineBasicBlock* WaterBB = *IP; // Check if water is in range and is either at a lower address than the // current "high water mark" or a new water block that was created since @@ -1092,31 +1168,24 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, // should be relatively uncommon and when it does happen, we want to be // sure to take advantage of it for all the CPEs near that block, so that // we don't insert more branches than necessary. - if (WaterIsInRange(UserOffset, WaterBB, U) && + unsigned Growth; + if (WaterIsInRange(UserOffset, WaterBB, U, Growth) && (WaterBB->getNumber() < U.HighWaterMark->getNumber() || - NewWaterList.count(WaterBB))) { - unsigned WBBId = WaterBB->getNumber(); - if (isThumb && - (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) { - // This is valid Water, but would introduce padding. Remember - // it in case we don't find any Water that doesn't do this. - if (!FoundWaterThatWouldPad) { - FoundWaterThatWouldPad = true; - IPThatWouldPad = IP; - } - } else { - WaterIter = IP; + NewWaterList.count(WaterBB)) && Growth < BestGrowth) { + // This is the least amount of required padding seen so far. + BestGrowth = Growth; + WaterIter = IP; + DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber() + << " Growth=" << Growth << '\n'); + + // Keep looking unless it is perfect. + if (BestGrowth == 0) return true; - } } if (IP == B) break; } - if (FoundWaterThatWouldPad) { - WaterIter = IPThatWouldPad; - return true; - } - return false; + return BestGrowth != ~0u; } /// CreateNewWater - No existing WaterList entry will work for @@ -1132,114 +1201,143 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; + unsigned CPELogAlign = getCPELogAlign(CPEMI); MachineBasicBlock *UserMBB = UserMI->getParent(); - unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] + - BBSizes[UserMBB->getNumber()]; - assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]); + const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()]; // If the block does not end in an unconditional branch already, and if the // end of the block is within range, make new water there. (The addition // below is for the unconditional branch we will be adding: 4 bytes on ARM + // Thumb2, 2 on Thumb1. Possible Thumb1 alignment padding is allowed for // inside OffsetIsInRange. - if (BBHasFallthrough(UserMBB) && - OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4), - U.MaxDisp, U.NegOk, U.IsSoImm)) { - DEBUG(errs() << "Split at end of block\n"); - if (&UserMBB->back() == UserMI) - assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!"); - NewMBB = llvm::next(MachineFunction::iterator(UserMBB)); - // Add an unconditional branch from UserMBB to fallthrough block. - // Record it for branch lengthening; this new branch will not get out of - // range, but if the preceding conditional branch is out of range, the - // targets will be exchanged, and the altered branch may be out of - // range, so the machinery has to know about it. - int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B; - if (!isThumb) - BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB); - else - BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB) - .addImm(ARMCC::AL).addReg(0); - unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); - ImmBranches.push_back(ImmBranch(&UserMBB->back(), - MaxDisp, false, UncondBr)); - int delta = isThumb1 ? 2 : 4; - BBSizes[UserMBB->getNumber()] += delta; - AdjustBBOffsetsAfter(UserMBB, delta); - } else { - // What a big block. Find a place within the block to split it. - // This is a little tricky on Thumb1 since instructions are 2 bytes - // and constant pool entries are 4 bytes: if instruction I references - // island CPE, and instruction I+1 references CPE', it will - // not work well to put CPE as far forward as possible, since then - // CPE' cannot immediately follow it (that location is 2 bytes - // farther away from I+1 than CPE was from I) and we'd need to create - // a new island. So, we make a first guess, then walk through the - // instructions between the one currently being looked at and the - // possible insertion point, and make sure any other instructions - // that reference CPEs will be able to use the same island area; - // if not, we back up the insertion point. - - // The 4 in the following is for the unconditional branch we'll be - // inserting (allows for long branch on Thumb1). Alignment of the - // island is handled inside OffsetIsInRange. - unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4; - // This could point off the end of the block if we've already got - // constant pool entries following this block; only the last one is - // in the water list. Back past any possible branches (allow for a - // conditional and a maximally long unconditional). - if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1]) - BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] - - (isThumb1 ? 6 : 8); - unsigned EndInsertOffset = BaseInsertOffset + - CPEMI->getOperand(2).getImm(); - MachineBasicBlock::iterator MI = UserMI; - ++MI; - unsigned CPUIndex = CPUserIndex+1; - unsigned NumCPUsers = CPUsers.size(); - MachineInstr *LastIT = 0; - for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); - Offset < BaseInsertOffset; - Offset += TII->GetInstSizeInBytes(MI), - MI = llvm::next(MI)) { - if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { - CPUser &U = CPUsers[CPUIndex]; - if (!OffsetIsInRange(Offset, EndInsertOffset, - U.MaxDisp, U.NegOk, U.IsSoImm)) { - BaseInsertOffset -= (isThumb1 ? 2 : 4); - EndInsertOffset -= (isThumb1 ? 2 : 4); - } - // This is overly conservative, as we don't account for CPEMIs - // being reused within the block, but it doesn't matter much. - EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm(); - CPUIndex++; - } + if (BBHasFallthrough(UserMBB)) { + // Size of branch to insert. + unsigned Delta = isThumb1 ? 2 : 4; + // End of UserBlock after adding a branch. + unsigned UserBlockEnd = UserBBI.postOffset() + Delta; + // Compute the offset where the CPE will begin. + unsigned CPEOffset = WorstCaseAlign(UserBlockEnd, CPELogAlign, + UserBBI.postKnownBits()); + + if (OffsetIsInRange(UserOffset, CPEOffset, U)) { + DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber() + << format(", expected CPE offset %#x\n", CPEOffset)); + NewMBB = llvm::next(MachineFunction::iterator(UserMBB)); + // Add an unconditional branch from UserMBB to fallthrough block. Record + // it for branch lengthening; this new branch will not get out of range, + // but if the preceding conditional branch is out of range, the targets + // will be exchanged, and the altered branch may be out of range, so the + // machinery has to know about it. + int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B; + if (!isThumb) + BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB); + else + BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB) + .addImm(ARMCC::AL).addReg(0); + unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); + ImmBranches.push_back(ImmBranch(&UserMBB->back(), + MaxDisp, false, UncondBr)); + BBInfo[UserMBB->getNumber()].Size += Delta; + AdjustBBOffsetsAfter(UserMBB); + return; + } + } - // Remember the last IT instruction. - if (MI->getOpcode() == ARM::t2IT) - LastIT = MI; + // What a big block. Find a place within the block to split it. This is a + // little tricky on Thumb1 since instructions are 2 bytes and constant pool + // entries are 4 bytes: if instruction I references island CPE, and + // instruction I+1 references CPE', it will not work well to put CPE as far + // forward as possible, since then CPE' cannot immediately follow it (that + // location is 2 bytes farther away from I+1 than CPE was from I) and we'd + // need to create a new island. So, we make a first guess, then walk through + // the instructions between the one currently being looked at and the + // possible insertion point, and make sure any other instructions that + // reference CPEs will be able to use the same island area; if not, we back + // up the insertion point. + + // Try to split the block so it's fully aligned. Compute the latest split + // point where we can add a 4-byte branch instruction, and then + // WorstCaseAlign to LogAlign. + unsigned LogAlign = MF->getAlignment(); + assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry"); + unsigned KnownBits = UserBBI.internalKnownBits(); + unsigned UPad = UnknownPadding(LogAlign, KnownBits); + unsigned BaseInsertOffset = UserOffset + U.MaxDisp; + DEBUG(dbgs() << format("Split in middle of big block before %#x", + BaseInsertOffset)); + + // Account for alignment and unknown padding. + BaseInsertOffset &= ~((1u << LogAlign) - 1); + BaseInsertOffset -= UPad; + + // The 4 in the following is for the unconditional branch we'll be inserting + // (allows for long branch on Thumb1). Alignment of the island is handled + // inside OffsetIsInRange. + BaseInsertOffset -= 4; + + DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset) + << " la=" << LogAlign + << " kb=" << KnownBits + << " up=" << UPad << '\n'); + + // This could point off the end of the block if we've already got constant + // pool entries following this block; only the last one is in the water list. + // Back past any possible branches (allow for a conditional and a maximally + // long unconditional). + if (BaseInsertOffset >= BBInfo[UserMBB->getNumber()+1].Offset) + BaseInsertOffset = BBInfo[UserMBB->getNumber()+1].Offset - + (isThumb1 ? 6 : 8); + unsigned EndInsertOffset = + WorstCaseAlign(BaseInsertOffset + 4, LogAlign, KnownBits) + + CPEMI->getOperand(2).getImm(); + MachineBasicBlock::iterator MI = UserMI; + ++MI; + unsigned CPUIndex = CPUserIndex+1; + unsigned NumCPUsers = CPUsers.size(); + MachineInstr *LastIT = 0; + for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); + Offset < BaseInsertOffset; + Offset += TII->GetInstSizeInBytes(MI), + MI = llvm::next(MI)) { + if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { + CPUser &U = CPUsers[CPUIndex]; + if (!OffsetIsInRange(Offset, EndInsertOffset, U)) { + // Shift intertion point by one unit of alignment so it is within reach. + BaseInsertOffset -= 1u << LogAlign; + EndInsertOffset -= 1u << LogAlign; + } + // This is overly conservative, as we don't account for CPEMIs being + // reused within the block, but it doesn't matter much. Also assume CPEs + // are added in order with alignment padding. We may eventually be able + // to pack the aligned CPEs better. + EndInsertOffset = RoundUpToAlignment(EndInsertOffset, + 1u << getCPELogAlign(U.CPEMI)) + + U.CPEMI->getOperand(2).getImm(); + CPUIndex++; } - DEBUG(errs() << "Split in middle of big block\n"); - --MI; + // Remember the last IT instruction. + if (MI->getOpcode() == ARM::t2IT) + LastIT = MI; + } - // Avoid splitting an IT block. - if (LastIT) { - unsigned PredReg = 0; - ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); - if (CC != ARMCC::AL) - MI = LastIT; - } - NewMBB = SplitBlockBeforeInstr(MI); + --MI; + + // Avoid splitting an IT block. + if (LastIT) { + unsigned PredReg = 0; + ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); + if (CC != ARMCC::AL) + MI = LastIT; } + NewMBB = SplitBlockBeforeInstr(MI); } /// HandleConstantPoolUser - Analyze the specified user, checking to see if it /// is out-of-range. If so, pick up the constant pool value and move it some /// place in-range. Return true if we changed any addresses (thus must run /// another pass of branch lengthening), false otherwise. -bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, - unsigned CPUserIndex) { +bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; @@ -1260,11 +1358,11 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, unsigned ID = AFI->createPICLabelUId(); // Look for water where we can place this CPE. - MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock(); + MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock(); MachineBasicBlock *NewMBB; water_iterator IP; if (LookForWater(U, UserOffset, IP)) { - DEBUG(errs() << "found water in range\n"); + DEBUG(dbgs() << "Found water in range\n"); MachineBasicBlock *WaterBB = *IP; // If the original WaterList entry was "new water" on this iteration, @@ -1279,7 +1377,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, } else { // No water found. - DEBUG(errs() << "No water found\n"); + DEBUG(dbgs() << "No water found\n"); CreateNewWater(CPUserIndex, UserOffset, NewMBB); // SplitBlockBeforeInstr adds to WaterList, which is important when it is @@ -1304,7 +1402,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, WaterList.erase(IP); // Okay, we know we can put an island before NewMBB now, do it! - MF.insert(NewMBB, NewIsland); + MF->insert(NewMBB, NewIsland); // Update internal data structures to account for the newly inserted MBB. UpdateForInsertedWaterBlock(NewIsland); @@ -1320,13 +1418,12 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1)); ++NumCPEs; - BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()]; - // Compensate for .align 2 in thumb mode. - if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm)) - Size += 2; + // Mark the basic block as aligned as required by the const-pool entry. + NewIsland->setAlignment(getCPELogAlign(U.CPEMI)); + // Increase the size of the island block to account for the new entry. - BBSizes[NewIsland->getNumber()] += Size; - AdjustBBOffsetsAfter(NewIsland, Size); + BBInfo[NewIsland->getNumber()].Size += Size; + AdjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland))); // Finally, change the CPI in the instruction operand to be ID. for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) @@ -1335,8 +1432,8 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, break; } - DEBUG(errs() << " Moved CPE to #" << ID << " CPI=" << CPI - << '\t' << *UserMI); + DEBUG(dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI + << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset)); return true; } @@ -1347,19 +1444,18 @@ void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) { MachineBasicBlock *CPEBB = CPEMI->getParent(); unsigned Size = CPEMI->getOperand(2).getImm(); CPEMI->eraseFromParent(); - BBSizes[CPEBB->getNumber()] -= Size; + BBInfo[CPEBB->getNumber()].Size -= Size; // All succeeding offsets have the current size value added in, fix this. if (CPEBB->empty()) { - // In thumb1 mode, the size of island may be padded by two to compensate for - // the alignment requirement. Then it will now be 2 when the block is - // empty, so fix this. - // All succeeding offsets have the current size value added in, fix this. - if (BBSizes[CPEBB->getNumber()] != 0) { - Size += BBSizes[CPEBB->getNumber()]; - BBSizes[CPEBB->getNumber()] = 0; - } - } - AdjustBBOffsetsAfter(CPEBB, -Size); + BBInfo[CPEBB->getNumber()].Size = 0; + + // This block no longer needs to be aligned. <rdar://problem/10534709>. + CPEBB->setAlignment(0); + } else + // Entries are sorted by descending alignment, so realign from the front. + CPEBB->setAlignment(getCPELogAlign(CPEBB->begin())); + + AdjustBBOffsetsAfter(CPEBB); // An island has only one predecessor BB and one successor BB. Check if // this BB's predecessor jumps directly to this BB's successor. This // shouldn't happen currently. @@ -1390,9 +1486,9 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB, unsigned MaxDisp) { unsigned PCAdj = isThumb ? 4 : 8; unsigned BrOffset = GetOffsetOf(MI) + PCAdj; - unsigned DestOffset = BBOffsets[DestBB->getNumber()]; + unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; - DEBUG(errs() << "Branch of destination BB#" << DestBB->getNumber() + DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() << " from BB#" << MI->getParent()->getNumber() << " max delta=" << MaxDisp << " from " << GetOffsetOf(MI) << " to " << DestOffset @@ -1411,7 +1507,7 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB, /// FixUpImmediateBr - Fix up an immediate branch whose destination is too far /// away to fit in its displacement field. -bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) { +bool ARMConstantIslands::FixUpImmediateBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); @@ -1420,8 +1516,8 @@ bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) { return false; if (!Br.isCond) - return FixUpUnconditionalBr(MF, Br); - return FixUpConditionalBr(MF, Br); + return FixUpUnconditionalBr(Br); + return FixUpConditionalBr(Br); } /// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is @@ -1429,7 +1525,7 @@ bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) { /// spilled in the epilogue, then we can use BL to implement a far jump. /// Otherwise, add an intermediate branch instruction to a branch. bool -ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) { +ARMConstantIslands::FixUpUnconditionalBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *MBB = MI->getParent(); if (!isThumb1) @@ -1438,12 +1534,12 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) { // Use BL to implement far jump. Br.MaxDisp = (1 << 21) * 2; MI->setDesc(TII->get(ARM::tBfar)); - BBSizes[MBB->getNumber()] += 2; - AdjustBBOffsetsAfter(MBB, 2); + BBInfo[MBB->getNumber()].Size += 2; + AdjustBBOffsetsAfter(MBB); HasFarJump = true; ++NumUBrFixed; - DEBUG(errs() << " Changed B to long jump " << *MI); + DEBUG(dbgs() << " Changed B to long jump " << *MI); return true; } @@ -1452,7 +1548,7 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) { /// far away to fit in its displacement field. It is converted to an inverse /// conditional branch + an unconditional branch to the destination. bool -ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { +ARMConstantIslands::FixUpConditionalBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); @@ -1487,7 +1583,7 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { // b L1 MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); if (BBIsInRange(MI, NewDest, Br.MaxDisp)) { - DEBUG(errs() << " Invert Bcc condition and swap its destination with " + DEBUG(dbgs() << " Invert Bcc condition and swap its destination with " << *BMI); BMI->getOperand(0).setMBB(DestBB); MI->getOperand(0).setMBB(NewDest); @@ -1502,15 +1598,13 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { // No need for the branch to the next block. We're adding an unconditional // branch to the destination. int delta = TII->GetInstSizeInBytes(&MBB->back()); - BBSizes[MBB->getNumber()] -= delta; - MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB)); - AdjustBBOffsetsAfter(SplitBB, -delta); + BBInfo[MBB->getNumber()].Size -= delta; MBB->back().eraseFromParent(); - // BBOffsets[SplitBB] is wrong temporarily, fixed below + // BBInfo[SplitBB].Offset is wrong temporarily, fixed below } MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); - DEBUG(errs() << " Insert B to BB#" << DestBB->getNumber() + DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber() << " also invert condition and change dest. to BB#" << NextBB->getNumber() << "\n"); @@ -1519,23 +1613,20 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode())) .addMBB(NextBB).addImm(CC).addReg(CCReg); Br.MI = &MBB->back(); - BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back()); + BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); if (isThumb) BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB) .addImm(ARMCC::AL).addReg(0); else BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB); - BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back()); + BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr); ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr)); // Remove the old conditional branch. It may or may not still be in MBB. - BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI); + BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI); MI->eraseFromParent(); - - // The net size change is an addition of one unconditional branch. - int delta = TII->GetInstSizeInBytes(&MBB->back()); - AdjustBBOffsetsAfter(MBB, delta); + AdjustBBOffsetsAfter(MBB); return true; } @@ -1561,7 +1652,7 @@ bool ARMConstantIslands::UndoLRSpillRestore() { return MadeChange; } -bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) { +bool ARMConstantIslands::OptimizeThumb2Instructions() { bool MadeChange = false; // Shrink ADR and LDR from constantpool. @@ -1598,19 +1689,19 @@ bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) { if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) { U.MI->setDesc(TII->get(NewOpc)); MachineBasicBlock *MBB = U.MI->getParent(); - BBSizes[MBB->getNumber()] -= 2; - AdjustBBOffsetsAfter(MBB, -2); + BBInfo[MBB->getNumber()].Size -= 2; + AdjustBBOffsetsAfter(MBB); ++NumT2CPShrunk; MadeChange = true; } } - MadeChange |= OptimizeThumb2Branches(MF); - MadeChange |= OptimizeThumb2JumpTables(MF); + MadeChange |= OptimizeThumb2Branches(); + MadeChange |= OptimizeThumb2JumpTables(); return MadeChange; } -bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { +bool ARMConstantIslands::OptimizeThumb2Branches() { bool MadeChange = false; for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) { @@ -1639,8 +1730,8 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { if (BBIsInRange(Br.MI, DestBB, MaxOffs)) { Br.MI->setDesc(TII->get(NewOpc)); MachineBasicBlock *MBB = Br.MI->getParent(); - BBSizes[MBB->getNumber()] -= 2; - AdjustBBOffsetsAfter(MBB, -2); + BBInfo[MBB->getNumber()].Size -= 2; + AdjustBBOffsetsAfter(MBB); ++NumT2BrShrunk; MadeChange = true; } @@ -1663,7 +1754,7 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { // Check if the distance is within 126. Subtract starting offset by 2 // because the cmp will be eliminated. unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2; - unsigned DestOffset = BBOffsets[DestBB->getNumber()]; + unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) { MachineBasicBlock::iterator CmpMI = Br.MI; if (CmpMI != Br.MI->getParent()->begin()) { @@ -1681,8 +1772,8 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { CmpMI->eraseFromParent(); Br.MI->eraseFromParent(); Br.MI = NewBR; - BBSizes[MBB->getNumber()] -= 2; - AdjustBBOffsetsAfter(MBB, -2); + BBInfo[MBB->getNumber()].Size -= 2; + AdjustBBOffsetsAfter(MBB); ++NumCBZ; MadeChange = true; } @@ -1696,12 +1787,12 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { /// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller /// jumptables when it's possible. -bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { +bool ARMConstantIslands::OptimizeThumb2JumpTables() { bool MadeChange = false; // FIXME: After the tables are shrunk, can we get rid some of the // constantpool tables? - MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); if (MJTI == 0) return false; const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); @@ -1709,7 +1800,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { MachineInstr *MI = T2JumpTables[i]; const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); - unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2); + unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2); MachineOperand JTOP = MI->getOperand(JTOpIdx); unsigned JTI = JTOP.getIndex(); assert(JTI < JT.size()); @@ -1720,7 +1811,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) { MachineBasicBlock *MBB = JTBBs[j]; - unsigned DstOffset = BBOffsets[MBB->getNumber()]; + unsigned DstOffset = BBInfo[MBB->getNumber()].Offset; // Negative offset is not ok. FIXME: We should change BB layout to make // sure all the branches are forward. if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2) @@ -1808,8 +1899,8 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { MI->eraseFromParent(); int delta = OrigSize - NewSize; - BBSizes[MBB->getNumber()] -= delta; - AdjustBBOffsetsAfter(MBB, -delta); + BBInfo[MBB->getNumber()].Size -= delta; + AdjustBBOffsetsAfter(MBB); ++NumTBs; MadeChange = true; @@ -1821,10 +1912,10 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { /// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that /// jump tables always branch forwards, since that's what tbb and tbh need. -bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { +bool ARMConstantIslands::ReorderThumb2JumpTables() { bool MadeChange = false; - MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); if (MJTI == 0) return false; const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); @@ -1832,7 +1923,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { MachineInstr *MI = T2JumpTables[i]; const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); - unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2); + unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2); MachineOperand JTOP = MI->getOperand(JTOpIdx); unsigned JTI = JTOP.getIndex(); assert(JTI < JT.size()); @@ -1864,8 +1955,6 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { MachineBasicBlock *ARMConstantIslands:: AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { - MachineFunction &MF = *BB->getParent(); - // If the destination block is terminated by an unconditional branch, // try to move it; otherwise, create a new block following the jump // table that branches back to the actual target. This is a very simple @@ -1882,22 +1971,22 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) // If the block ends in an unconditional branch, move it. The prior block // has to have an analyzable terminator for us to move this one. Be paranoid // and make sure we're not trying to move the entry block of the function. - if (!B && Cond.empty() && BB != MF.begin() && + if (!B && Cond.empty() && BB != MF->begin() && !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) { BB->moveAfter(JTBB); OldPrior->updateTerminator(); BB->updateTerminator(); // Update numbering to account for the block being moved. - MF.RenumberBlocks(); + MF->RenumberBlocks(); ++NumJTMoved; return NULL; } // Create a new MBB for the code after the jump BB. MachineBasicBlock *NewBB = - MF.CreateMachineBasicBlock(JTBB->getBasicBlock()); + MF->CreateMachineBasicBlock(JTBB->getBasicBlock()); MachineFunction::iterator MBBI = JTBB; ++MBBI; - MF.insert(MBBI, NewBB); + MF->insert(MBBI, NewBB); // Add an unconditional branch from NewBB to BB. // There doesn't seem to be meaningful DebugInfo available; this doesn't @@ -1907,7 +1996,7 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) .addImm(ARMCC::AL).addReg(0); // Update internal data structures to account for the newly inserted MBB. - MF.RenumberBlocks(NewBB); + MF->RenumberBlocks(NewBB); // Update the CFG. NewBB->addSuccessor(BB); diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index fc464ea..01d772d 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -61,7 +61,7 @@ namespace { void ExpandVST(MachineBasicBlock::iterator &MBBI); void ExpandLaneOp(MachineBasicBlock::iterator &MBBI); void ExpandVTBL(MachineBasicBlock::iterator &MBBI, - unsigned Opc, bool IsExt, unsigned NumRegs); + unsigned Opc, bool IsExt); void ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI); }; @@ -129,12 +129,15 @@ namespace { } static const NEONLdStTableEntry NEONLdStTable[] = { -{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, false, SingleSpc, 2, 4,true}, -{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true, true, SingleSpc, 2, 4,true}, -{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, false, SingleSpc, 2, 2,true}, -{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true, true, SingleSpc, 2, 2,true}, -{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, false, SingleSpc, 2, 8,true}, -{ ARM::VLD1DUPq8Pseudo_UPD, ARM::VLD1DUPq8_UPD, true, true, true, SingleSpc, 2, 8,true}, +{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, false, SingleSpc, 2, 4,false}, +{ ARM::VLD1DUPq16PseudoWB_fixed, ARM::VLD1DUPq16wb_fixed, true, true, true, SingleSpc, 2, 4,false}, +{ ARM::VLD1DUPq16PseudoWB_register, ARM::VLD1DUPq16wb_register, true, true, true, SingleSpc, 2, 4,false}, +{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, false, SingleSpc, 2, 2,false}, +{ ARM::VLD1DUPq32PseudoWB_fixed, ARM::VLD1DUPq32wb_fixed, true, true, false, SingleSpc, 2, 2,false}, +{ ARM::VLD1DUPq32PseudoWB_register, ARM::VLD1DUPq32wb_register, true, true, true, SingleSpc, 2, 2,false}, +{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, false, SingleSpc, 2, 8,false}, +{ ARM::VLD1DUPq8PseudoWB_fixed, ARM::VLD1DUPq8wb_fixed, true, true, false, SingleSpc, 2, 8,false}, +{ ARM::VLD1DUPq8PseudoWB_register, ARM::VLD1DUPq8wb_register, true, true, true, SingleSpc, 2, 8,false}, { ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true}, { ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true}, @@ -177,18 +180,24 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true}, { ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, false, SingleSpc, 2, 4 ,false}, -{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, true, SingleSpc, 2, 4 ,false}, +{ ARM::VLD2d16PseudoWB_fixed, ARM::VLD2d16wb_fixed, true, true, false, SingleSpc, 2, 4 ,false}, +{ ARM::VLD2d16PseudoWB_register, ARM::VLD2d16wb_register, true, true, true, SingleSpc, 2, 4 ,false}, { ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, false, SingleSpc, 2, 2 ,false}, -{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, true, SingleSpc, 2, 2 ,false}, +{ ARM::VLD2d32PseudoWB_fixed, ARM::VLD2d32wb_fixed, true, true, false, SingleSpc, 2, 2 ,false}, +{ ARM::VLD2d32PseudoWB_register, ARM::VLD2d32wb_register, true, true, true, SingleSpc, 2, 2 ,false}, { ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, false, SingleSpc, 2, 8 ,false}, -{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, true, SingleSpc, 2, 8 ,false}, +{ ARM::VLD2d8PseudoWB_fixed, ARM::VLD2d8wb_fixed, true, true, false, SingleSpc, 2, 8 ,false}, +{ ARM::VLD2d8PseudoWB_register, ARM::VLD2d8wb_register, true, true, true, SingleSpc, 2, 8 ,false}, { ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false}, -{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, true, SingleSpc, 4, 4 ,false}, +{ ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false}, +{ ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false}, { ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false}, -{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, true, SingleSpc, 4, 2 ,false}, +{ ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false}, +{ ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false}, { ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false}, -{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, true, SingleSpc, 4, 8 ,false}, +{ ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false}, +{ ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false}, { ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true}, { ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true}, @@ -267,10 +276,12 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true}, { ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true}, -{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,true}, -{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, true, SingleSpc, 4, 1 ,true}, -{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,true}, -{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, true, SingleSpc, 3, 1 ,true}, +{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false}, +{ ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false}, +{ ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false}, +{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false}, +{ ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false}, +{ ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false}, { ARM::VST1q16Pseudo, ARM::VST1q16, false, false, false, SingleSpc, 2, 4 ,false}, { ARM::VST1q16PseudoWB_fixed, ARM::VST1q16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false}, @@ -296,19 +307,25 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true}, { ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true}, -{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,true}, -{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, true, SingleSpc, 2, 4 ,true}, -{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,true}, -{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, true, SingleSpc, 2, 2 ,true}, -{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,true}, -{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, true, SingleSpc, 2, 8 ,true}, - -{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,true}, -{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, true, SingleSpc, 4, 4 ,true}, -{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,true}, -{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, true, SingleSpc, 4, 2 ,true}, -{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,true}, -{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, true, SingleSpc, 4, 8 ,true}, +{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,false}, +{ ARM::VST2d16PseudoWB_fixed, ARM::VST2d16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false}, +{ ARM::VST2d16PseudoWB_register, ARM::VST2d16wb_register, false, true, true, SingleSpc, 2, 4 ,false}, +{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,false}, +{ ARM::VST2d32PseudoWB_fixed, ARM::VST2d32wb_fixed, false, true, true, SingleSpc, 2, 2 ,false}, +{ ARM::VST2d32PseudoWB_register, ARM::VST2d32wb_register, false, true, true, SingleSpc, 2, 2 ,false}, +{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,false}, +{ ARM::VST2d8PseudoWB_fixed, ARM::VST2d8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false}, +{ ARM::VST2d8PseudoWB_register, ARM::VST2d8wb_register, false, true, true, SingleSpc, 2, 8 ,false}, + +{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false}, +{ ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false}, +{ ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false}, +{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false}, +{ ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false}, +{ ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false}, +{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false}, +{ ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false}, +{ ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false}, { ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true}, { ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true}, @@ -620,7 +637,7 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { /// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ /// register operands to real instructions with D register operands. void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, - unsigned Opc, bool IsExt, unsigned NumRegs) { + unsigned Opc, bool IsExt) { MachineInstr &MI = *MBBI; MachineBasicBlock &MBB = *MI.getParent(); @@ -636,11 +653,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3); - MIB.addReg(D0).addReg(D1); - if (NumRegs > 2) - MIB.addReg(D2); - if (NumRegs > 3) - MIB.addReg(D3); + MIB.addReg(D0); // Copy the other source register operand. MIB.addOperand(MI.getOperand(OpIdx++)); @@ -1090,12 +1103,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD2q8Pseudo: case ARM::VLD2q16Pseudo: case ARM::VLD2q32Pseudo: - case ARM::VLD2d8Pseudo_UPD: - case ARM::VLD2d16Pseudo_UPD: - case ARM::VLD2d32Pseudo_UPD: - case ARM::VLD2q8Pseudo_UPD: - case ARM::VLD2q16Pseudo_UPD: - case ARM::VLD2q32Pseudo_UPD: + case ARM::VLD2d8PseudoWB_fixed: + case ARM::VLD2d16PseudoWB_fixed: + case ARM::VLD2d32PseudoWB_fixed: + case ARM::VLD2q8PseudoWB_fixed: + case ARM::VLD2q16PseudoWB_fixed: + case ARM::VLD2q32PseudoWB_fixed: + case ARM::VLD2d8PseudoWB_register: + case ARM::VLD2d16PseudoWB_register: + case ARM::VLD2d32PseudoWB_register: + case ARM::VLD2q8PseudoWB_register: + case ARM::VLD2q16PseudoWB_register: + case ARM::VLD2q32PseudoWB_register: case ARM::VLD3d8Pseudo: case ARM::VLD3d16Pseudo: case ARM::VLD3d32Pseudo: @@ -1131,9 +1150,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD1DUPq8Pseudo: case ARM::VLD1DUPq16Pseudo: case ARM::VLD1DUPq32Pseudo: - case ARM::VLD1DUPq8Pseudo_UPD: - case ARM::VLD1DUPq16Pseudo_UPD: - case ARM::VLD1DUPq32Pseudo_UPD: + case ARM::VLD1DUPq8PseudoWB_fixed: + case ARM::VLD1DUPq16PseudoWB_fixed: + case ARM::VLD1DUPq32PseudoWB_fixed: + case ARM::VLD1DUPq8PseudoWB_register: + case ARM::VLD1DUPq16PseudoWB_register: + case ARM::VLD1DUPq32PseudoWB_register: case ARM::VLD2DUPd8Pseudo: case ARM::VLD2DUPd16Pseudo: case ARM::VLD2DUPd32Pseudo: @@ -1173,12 +1195,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VST2q8Pseudo: case ARM::VST2q16Pseudo: case ARM::VST2q32Pseudo: - case ARM::VST2d8Pseudo_UPD: - case ARM::VST2d16Pseudo_UPD: - case ARM::VST2d32Pseudo_UPD: - case ARM::VST2q8Pseudo_UPD: - case ARM::VST2q16Pseudo_UPD: - case ARM::VST2q32Pseudo_UPD: + case ARM::VST2d8PseudoWB_fixed: + case ARM::VST2d16PseudoWB_fixed: + case ARM::VST2d32PseudoWB_fixed: + case ARM::VST2q8PseudoWB_fixed: + case ARM::VST2q16PseudoWB_fixed: + case ARM::VST2q32PseudoWB_fixed: + case ARM::VST2d8PseudoWB_register: + case ARM::VST2d16PseudoWB_register: + case ARM::VST2d32PseudoWB_register: + case ARM::VST2q8PseudoWB_register: + case ARM::VST2q16PseudoWB_register: + case ARM::VST2q32PseudoWB_register: case ARM::VST3d8Pseudo: case ARM::VST3d16Pseudo: case ARM::VST3d32Pseudo: @@ -1186,7 +1214,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VST3d8Pseudo_UPD: case ARM::VST3d16Pseudo_UPD: case ARM::VST3d32Pseudo_UPD: - case ARM::VST1d64TPseudo_UPD: + case ARM::VST1d64TPseudoWB_fixed: + case ARM::VST1d64TPseudoWB_register: case ARM::VST3q8Pseudo_UPD: case ARM::VST3q16Pseudo_UPD: case ARM::VST3q32Pseudo_UPD: @@ -1203,7 +1232,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VST4d8Pseudo_UPD: case ARM::VST4d16Pseudo_UPD: case ARM::VST4d32Pseudo_UPD: - case ARM::VST1d64QPseudo_UPD: + case ARM::VST1d64QPseudoWB_fixed: + case ARM::VST1d64QPseudoWB_register: case ARM::VST4q8Pseudo_UPD: case ARM::VST4q16Pseudo_UPD: case ARM::VST4q32Pseudo_UPD: @@ -1291,12 +1321,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandLaneOp(MBBI); return true; - case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true; - case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true; - case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true; - case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true; - case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true; - case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true; + case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false); return true; + case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true; + case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; + case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true); return true; + case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; + case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true; } return false; diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 9bae422..a98dfc3 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -178,10 +178,12 @@ class ARMFastISel : public FastISel { bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt); - bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, bool isZExt, - bool allocReg); + bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, + unsigned Alignment = 0, bool isZExt = true, + bool allocReg = true); - bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); + bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, + unsigned Alignment = 0); bool ARMComputeAddress(const Value *Obj, Address &Addr); void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3); bool ARMIsMemCpySmall(uint64_t Len); @@ -227,8 +229,7 @@ class ARMFastISel : public FastISel { // we don't care about implicit defs here, just places we'll need to add a // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR. bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.hasOptionalDef()) + if (!MI->hasOptionalDef()) return false; // Look to see if our OptionalDef is defining CPSR or CCR. @@ -702,7 +703,7 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { TargetRegisterClass* RC = TLI.getRegClassFor(VT); unsigned ResultReg = createResultReg(RC); unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; - AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addFrameIndex(SI->second) .addImm(0)); @@ -898,7 +899,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { ARM::GPRRegisterClass; unsigned ResultReg = createResultReg(RC); unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; - AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addFrameIndex(Addr.Base.FI) .addImm(0)); @@ -937,7 +938,8 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, // Now add the rest of the operands. MIB.addFrameIndex(FI); - // ARM halfword load/stores and signed byte loads need an additional operand. + // ARM halfword load/stores and signed byte loads need an additional + // operand. if (useAM3) { signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; MIB.addReg(0); @@ -950,7 +952,8 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, // Now add the rest of the operands. MIB.addReg(Addr.Base.Reg); - // ARM halfword load/stores and signed byte loads need an additional operand. + // ARM halfword load/stores and signed byte loads need an additional + // operand. if (useAM3) { signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; MIB.addReg(0); @@ -963,10 +966,11 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, } bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, - bool isZExt = true, bool allocReg = true) { + unsigned Alignment, bool isZExt, bool allocReg) { assert(VT.isSimple() && "Non-simple types are invalid here!"); unsigned Opc; bool useAM3 = false; + bool needVMOV = false; TargetRegisterClass *RC; switch (VT.getSimpleVT().SimpleTy) { // This is mostly going to be Neon/vector support. @@ -1012,10 +1016,25 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, RC = ARM::GPRRegisterClass; break; case MVT::f32: - Opc = ARM::VLDRS; - RC = TLI.getRegClassFor(VT); + if (!Subtarget->hasVFP2()) return false; + // Unaligned loads need special handling. Floats require word-alignment. + if (Alignment && Alignment < 4) { + needVMOV = true; + VT = MVT::i32; + Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; + RC = ARM::GPRRegisterClass; + } else { + Opc = ARM::VLDRS; + RC = TLI.getRegClassFor(VT); + } break; case MVT::f64: + if (!Subtarget->hasVFP2()) return false; + // FIXME: Unaligned loads need special handling. Doublewords require + // word-alignment. + if (Alignment && Alignment < 4) + return false; + Opc = ARM::VLDRD; RC = TLI.getRegClassFor(VT); break; @@ -1030,6 +1049,16 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg); AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3); + + // If we had an unaligned load of a float we've converted it to an regular + // load. Now we must move from the GRP to the FP register. + if (needVMOV) { + unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::VMOVSR), MoveReg) + .addReg(ResultReg)); + ResultReg = MoveReg; + } return true; } @@ -1048,12 +1077,14 @@ bool ARMFastISel::SelectLoad(const Instruction *I) { if (!ARMComputeAddress(I->getOperand(0), Addr)) return false; unsigned ResultReg; - if (!ARMEmitLoad(VT, ResultReg, Addr)) return false; + if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment())) + return false; UpdateValueMap(I, ResultReg); return true; } -bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { +bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, + unsigned Alignment) { unsigned StrOpc; bool useAM3 = false; switch (VT.getSimpleVT().SimpleTy) { @@ -1101,10 +1132,26 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { break; case MVT::f32: if (!Subtarget->hasVFP2()) return false; - StrOpc = ARM::VSTRS; + // Unaligned stores need special handling. Floats require word-alignment. + if (Alignment && Alignment < 4) { + unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::VMOVRS), MoveReg) + .addReg(SrcReg)); + SrcReg = MoveReg; + VT = MVT::i32; + StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12; + } else { + StrOpc = ARM::VSTRS; + } break; case MVT::f64: if (!Subtarget->hasVFP2()) return false; + // FIXME: Unaligned stores need special handling. Doublewords require + // word-alignment. + if (Alignment && Alignment < 4) + return false; + StrOpc = ARM::VSTRD; break; } @@ -1141,7 +1188,8 @@ bool ARMFastISel::SelectStore(const Instruction *I) { if (!ARMComputeAddress(I->getOperand(1), Addr)) return false; - if (!ARMEmitStore(VT, SrcReg, Addr)) return false; + if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment())) + return false; return true; } @@ -1360,7 +1408,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, unsigned SrcReg1 = getRegForValue(Src1Value); if (SrcReg1 == 0) return false; - unsigned SrcReg2; + unsigned SrcReg2 = 0; if (!UseImm) { SrcReg2 = getRegForValue(Src2Value); if (SrcReg2 == 0) return false; @@ -1577,7 +1625,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { (ARM_AM::getSOImmVal(Imm) != -1); } - unsigned Op2Reg; + unsigned Op2Reg = 0; if (!UseImm) { Op2Reg = getRegForValue(I->getOperand(2)); if (Op2Reg == 0) return false; @@ -1716,7 +1764,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) { // Use target triple & subtarget features to do actual dispatch. if (Subtarget->isAAPCS_ABI()) { if (Subtarget->hasVFP2() && - FloatABIType == FloatABI::Hard) + TM.Options.FloatABIType == FloatABI::Hard) return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); else return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); @@ -1765,21 +1813,23 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, switch (VA.getLocInfo()) { case CCValAssign::Full: break; case CCValAssign::SExt: { - EVT DestVT = VA.getLocVT(); + MVT DestVT = VA.getLocVT(); unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false); assert (ResultReg != 0 && "Failed to emit a sext"); Arg = ResultReg; + ArgVT = DestVT; break; } case CCValAssign::AExt: // Intentional fall-through. Handle AExt and ZExt. case CCValAssign::ZExt: { - EVT DestVT = VA.getLocVT(); + MVT DestVT = VA.getLocVT(); unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true); assert (ResultReg != 0 && "Failed to emit a sext"); Arg = ResultReg; + ArgVT = DestVT; break; } case CCValAssign::BCvt: { @@ -2456,7 +2506,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false; unsigned ResultReg = MI->getOperand(0).getReg(); - if (!ARMEmitLoad(VT, ResultReg, Addr, isZExt, false)) + if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false)) return false; MI->eraseFromParent(); return true; diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 2d1de6f..06944b1 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -37,7 +37,8 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); // Always eliminate non-leaf frame pointers. - return ((DisableFramePointerElim(MF) && MFI->hasCalls()) || + return ((MF.getTarget().Options.DisableFramePointerElim(MF) && + MFI->hasCalls()) || RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()); @@ -309,8 +310,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - assert(MBBI->getDesc().isReturn() && - "Can only insert epilog into returning blocks"); + assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index 787f6a2..a5fd15b 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -21,7 +21,7 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, // FIXME: Detect integer instructions properly. const MCInstrDesc &MCID = MI->getDesc(); unsigned Domain = MCID.TSFlags & ARMII::DomainMask; - if (MCID.mayStore()) + if (MI->mayStore()) return false; unsigned Opcode = MCID.getOpcode(); if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) @@ -38,9 +38,6 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { MachineInstr *MI = SU->getInstr(); if (!MI->isDebugValue()) { - if (ITBlockSize && MI != ITBlockMIs[ITBlockSize-1]) - return Hazard; - // Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following // a VMLA / VMLS will cause 4 cycle stall. const MCInstrDesc &MCID = MI->getDesc(); @@ -48,9 +45,9 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { MachineInstr *DefMI = LastMI; const MCInstrDesc &LastMCID = LastMI->getDesc(); // Skip over one non-VFP / NEON instruction. - if (!LastMCID.isBarrier() && + if (!LastMI->isBarrier() && // On A9, AGU and NEON/FPU are muxed. - !(STI.isCortexA9() && (LastMCID.mayLoad() || LastMCID.mayStore())) && + !(STI.isCortexA9() && (LastMI->mayLoad() || LastMI->mayStore())) && (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) { MachineBasicBlock::iterator I = LastMI; if (I != LastMI->getParent()->begin()) { @@ -76,30 +73,11 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { void ARMHazardRecognizer::Reset() { LastMI = 0; FpMLxStalls = 0; - ITBlockSize = 0; ScoreboardHazardRecognizer::Reset(); } void ARMHazardRecognizer::EmitInstruction(SUnit *SU) { MachineInstr *MI = SU->getInstr(); - unsigned Opcode = MI->getOpcode(); - if (ITBlockSize) { - --ITBlockSize; - } else if (Opcode == ARM::t2IT) { - unsigned Mask = MI->getOperand(1).getImm(); - unsigned NumTZ = CountTrailingZeros_32(Mask); - assert(NumTZ <= 3 && "Invalid IT mask!"); - ITBlockSize = 4 - NumTZ; - MachineBasicBlock::iterator I = MI; - for (unsigned i = 0; i < ITBlockSize; ++i) { - // Advance to the next instruction, skipping any dbg_value instructions. - do { - ++I; - } while (I->isDebugValue()); - ITBlockMIs[ITBlockSize-1-i] = &*I; - } - } - if (!MI->isDebugValue()) { LastMI = MI; FpMLxStalls = 0; diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h index 2bc218d..98bfc4c 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.h +++ b/lib/Target/ARM/ARMHazardRecognizer.h @@ -23,6 +23,10 @@ class ARMBaseRegisterInfo; class ARMSubtarget; class MachineInstr; +/// ARMHazardRecognizer handles special constraints that are not expressed in +/// the scheduling itinerary. This is only used during postRA scheduling. The +/// ARM preRA scheduler uses an unspecialized instance of the +/// ScoreboardHazardRecognizer. class ARMHazardRecognizer : public ScoreboardHazardRecognizer { const ARMBaseInstrInfo &TII; const ARMBaseRegisterInfo &TRI; @@ -30,8 +34,6 @@ class ARMHazardRecognizer : public ScoreboardHazardRecognizer { MachineInstr *LastMI; unsigned FpMLxStalls; - unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled. - MachineInstr *ITBlockMIs[4]; public: ARMHazardRecognizer(const InstrItineraryData *ItinData, @@ -40,7 +42,7 @@ public: const ARMSubtarget &sti, const ScheduleDAG *DAG) : ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii), - TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {} + TRI(tri), STI(sti), LastMI(0) {} virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void Reset(); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index bc8588f..7473141 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1579,6 +1579,22 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case ARM::VST1q16PseudoWB_fixed: return ARM::VST1q16PseudoWB_register; case ARM::VST1q32PseudoWB_fixed: return ARM::VST1q32PseudoWB_register; case ARM::VST1q64PseudoWB_fixed: return ARM::VST1q64PseudoWB_register; + case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; + case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; + + case ARM::VLD2d8PseudoWB_fixed: return ARM::VLD2d8PseudoWB_register; + case ARM::VLD2d16PseudoWB_fixed: return ARM::VLD2d16PseudoWB_register; + case ARM::VLD2d32PseudoWB_fixed: return ARM::VLD2d32PseudoWB_register; + case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; + case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; + case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; + + case ARM::VST2d8PseudoWB_fixed: return ARM::VST2d8PseudoWB_register; + case ARM::VST2d16PseudoWB_fixed: return ARM::VST2d16PseudoWB_register; + case ARM::VST2d32PseudoWB_fixed: return ARM::VST2d32PseudoWB_register; + case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; + case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; + case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; } return Opc; // If not one we handle, return it unchanged. } @@ -1646,13 +1662,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - // FIXME: VLD1 fixed increment doesn't need Reg0. Remove the reg0 + // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 // case entirely when the rest are updated to that form, too. - if (NumVecs == 1 && !isa<ConstantSDNode>(Inc.getNode())) + if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode())) Opc = getVLDSTRegisterUpdateOpcode(Opc); - // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so + // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs != 1 && Opc != ARM::VLD1q64PseudoWB_fixed) || + if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64PseudoWB_fixed) || !isa<ConstantSDNode>(Inc.getNode())) Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); } @@ -1796,9 +1812,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - // FIXME: VST1 fixed increment doesn't need Reg0. Remove the reg0 + // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0 // case entirely when the rest are updated to that form, too. - if (NumVecs == 1 && !isa<ConstantSDNode>(Inc.getNode())) + if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) Opc = getVLDSTRegisterUpdateOpcode(Opc); // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. @@ -2810,10 +2826,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VLD2_UPD: { - unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD, - ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed}; - unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD, - ARM::VLD2q32Pseudo_UPD }; + unsigned DOpcodes[] = { ARM::VLD2d8PseudoWB_fixed, + ARM::VLD2d16PseudoWB_fixed, + ARM::VLD2d32PseudoWB_fixed, + ARM::VLD1q64PseudoWB_fixed}; + unsigned QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, + ARM::VLD2q16PseudoWB_fixed, + ARM::VLD2q32PseudoWB_fixed }; return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0); } @@ -2876,16 +2895,19 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VST2_UPD: { - unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD, - ARM::VST2d32Pseudo_UPD, ARM::VST1q64PseudoWB_fixed}; - unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD, - ARM::VST2q32Pseudo_UPD }; + unsigned DOpcodes[] = { ARM::VST2d8PseudoWB_fixed, + ARM::VST2d16PseudoWB_fixed, + ARM::VST2d32PseudoWB_fixed, + ARM::VST1q64PseudoWB_fixed}; + unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, + ARM::VST2q16PseudoWB_fixed, + ARM::VST2q32PseudoWB_fixed }; return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0); } case ARMISD::VST3_UPD: { unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD, - ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD }; + ARM::VST3d32Pseudo_UPD,ARM::VST1d64TPseudoWB_fixed}; unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, ARM::VST3q16Pseudo_UPD, ARM::VST3q32Pseudo_UPD }; @@ -2897,7 +2919,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VST4_UPD: { unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, - ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD }; + ARM::VST4d32Pseudo_UPD,ARM::VST1d64QPseudoWB_fixed}; unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, ARM::VST4q16Pseudo_UPD, ARM::VST4q32Pseudo_UPD }; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 8c4c06f..c6c1f5b 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -72,7 +72,7 @@ ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true)); -namespace llvm { +namespace { class ARMCCState : public CCState { public: ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, @@ -432,7 +432,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); else addRegisterClass(MVT::i32, ARM::GPRRegisterClass); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && + !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, ARM::SPRRegisterClass); if (!Subtarget->isFPOnlySP()) addRegisterClass(MVT::f64, ARM::DPRRegisterClass); @@ -467,13 +468,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // v2f64 is legal so that QR subregs can be extracted as f64 elements, but // neither Neon nor VFP support any arithmetic operations on it. + // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively + // supported for v4f32. setOperationAction(ISD::FADD, MVT::v2f64, Expand); setOperationAction(ISD::FSUB, MVT::v2f64, Expand); setOperationAction(ISD::FMUL, MVT::v2f64, Expand); + // FIXME: Code duplication: FDIV and FREM are expanded always, see + // ARMTargetLowering::addTypeForNEON method for details. setOperationAction(ISD::FDIV, MVT::v2f64, Expand); setOperationAction(ISD::FREM, MVT::v2f64, Expand); + // FIXME: Create unittest. + // In another words, find a way when "copysign" appears in DAG with vector + // operands. setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); + // FIXME: Code duplication: SETCC has custom operation action, see + // ARMTargetLowering::addTypeForNEON method for details. setOperationAction(ISD::SETCC, MVT::v2f64, Expand); + // FIXME: Create unittest for FNEG and for FABS. setOperationAction(ISD::FNEG, MVT::v2f64, Expand); setOperationAction(ISD::FABS, MVT::v2f64, Expand); setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); @@ -486,11 +497,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); setOperationAction(ISD::FEXP, MVT::v2f64, Expand); setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); + // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); setOperationAction(ISD::FRINT, MVT::v2f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); + + setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); + setOperationAction(ISD::FSIN, MVT::v4f32, Expand); + setOperationAction(ISD::FCOS, MVT::v4f32, Expand); + setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); + setOperationAction(ISD::FPOW, MVT::v4f32, Expand); + setOperationAction(ISD::FLOG, MVT::v4f32, Expand); + setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); + setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); + setOperationAction(ISD::FEXP, MVT::v4f32, Expand); + setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); // Neon does not support some operations on v1i64 and v2i64 types. setOperationAction(ISD::MUL, MVT::v1i64, Expand); @@ -586,6 +609,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) setOperationAction(ISD::CTLZ, MVT::i32, Expand); + // These just redirect to CTTZ and CTLZ on ARM. + setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); + // Only ARMv6 has BSWAP. if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); @@ -674,7 +701,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && + !Subtarget->isThumb1Only()) { // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR // iff target supports vfp2. setOperationAction(ISD::BITCAST, MVT::i64, Custom); @@ -712,7 +740,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && + !Subtarget->isThumb1Only()) { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); } @@ -723,7 +752,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FMA, MVT::f32, Expand); // Various VFP goodness - if (!UseSoftFloat && !Subtarget->isThumb1Only()) { + if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { // int <-> fp are custom expanded into bit_convert + ARMISD ops. if (Subtarget->hasVFP2()) { setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); @@ -751,7 +780,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setStackPointerRegisterToSaveRestore(ARM::SP); - if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) + if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() || + !Subtarget->hasVFP2()) setSchedulingPreference(Sched::RegPressure); else setSchedulingPreference(Sched::Hybrid); @@ -1092,7 +1122,8 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, if (!Subtarget->isAAPCS_ABI()) return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); else if (Subtarget->hasVFP2() && - FloatABIType == FloatABI::Hard && !isVarArg) + getTargetMachine().Options.FloatABIType == FloatABI::Hard && + !isVarArg) return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); } @@ -2951,7 +2982,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); - if (UnsafeFPMath && + if (getTargetMachine().Options.UnsafeFPMath && (CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETNE || CC == ISD::SETUNE)) { SDValue Result = OptimizeVFPBrcond(Op, DAG); @@ -3978,9 +4009,8 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, } // Use vmov.f32 to materialize other v2f32 and v4f32 splats. - if (VT == MVT::v2f32 || VT == MVT::v4f32) { - ConstantFPSDNode *C = cast<ConstantFPSDNode>(Op.getOperand(0)); - int ImmVal = ARM_AM::getFP32Imm(C->getValueAPF()); + if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) { + int ImmVal = ARM_AM::getFP32Imm(SplatBits); if (ImmVal != -1) { SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); @@ -6010,7 +6040,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { // executed. for (MachineBasicBlock::reverse_iterator II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { - if (!II->getDesc().isCall()) continue; + if (!II->isCall()) continue; DenseMap<unsigned, bool> DefRegs; for (MachineInstr::mop_iterator @@ -6421,13 +6451,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const { - const MCInstrDesc *MCID = &MI->getDesc(); - if (!MCID->hasPostISelHook()) { + if (!MI->hasPostISelHook()) { assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'"); return; } + const MCInstrDesc *MCID = &MI->getDesc(); // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, // RSC. Coming out of isel, they have an implicit CPSR def, but the optional // operand is still set to noreg. If needed, set the optional operand's @@ -6454,7 +6484,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // Any ARM instruction that sets the 's' bit should specify an optional // "cc_out" operand in the last operand position. - if (!MCID->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { + if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { assert(!NewOpc && "Optional cc_out operand required"); return; } @@ -7948,7 +7978,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, // will return -0, so vmin can only be used for unsafe math or if one of // the operands is known to be nonzero. if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && - !UnsafeFPMath && + !DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; @@ -7970,7 +8000,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, // will return +0, so vmax can only be used for unsafe math or if one of // the operands is known to be nonzero. if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && - !UnsafeFPMath && + !DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 6940156..80f3773 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -201,21 +201,29 @@ def msr_mask : Operand<i32> { // 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0> // 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0> // 64 64 - <imm> is encoded in imm6<5:0> +def shr_imm8_asm_operand : ImmAsmOperand { let Name = "ShrImm8"; } def shr_imm8 : Operand<i32> { let EncoderMethod = "getShiftRight8Imm"; let DecoderMethod = "DecodeShiftRight8Imm"; + let ParserMatchClass = shr_imm8_asm_operand; } +def shr_imm16_asm_operand : ImmAsmOperand { let Name = "ShrImm16"; } def shr_imm16 : Operand<i32> { let EncoderMethod = "getShiftRight16Imm"; let DecoderMethod = "DecodeShiftRight16Imm"; + let ParserMatchClass = shr_imm16_asm_operand; } +def shr_imm32_asm_operand : ImmAsmOperand { let Name = "ShrImm32"; } def shr_imm32 : Operand<i32> { let EncoderMethod = "getShiftRight32Imm"; let DecoderMethod = "DecodeShiftRight32Imm"; + let ParserMatchClass = shr_imm32_asm_operand; } +def shr_imm64_asm_operand : ImmAsmOperand { let Name = "ShrImm64"; } def shr_imm64 : Operand<i32> { let EncoderMethod = "getShiftRight64Imm"; let DecoderMethod = "DecodeShiftRight64Imm"; + let ParserMatchClass = shr_imm64_asm_operand; } //===----------------------------------------------------------------------===// @@ -231,6 +239,14 @@ class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1> : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>; class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1> : InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>; +class NEONInstAlias<string Asm, dag Result, bit Emit = 0b1> + : InstAlias<Asm, Result, Emit>, Requires<[HasNEON]>; + + +class VFP2MnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>, + Requires<[HasVFP2]>; +class NEONMnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>, + Requires<[HasNEON]>; //===----------------------------------------------------------------------===// // ARM Instruction templates. @@ -1994,73 +2010,111 @@ class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> { // VFP/NEON Instruction aliases for type suffices. class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result> : - InstAlias<!strconcat(opc, dt, asm), Result>; -multiclass VFPDT8ReqInstAlias<string opc, string asm, dag Result> { - def I8 : VFPDataTypeInstAlias<opc, ".i8", asm, Result>; - def S8 : VFPDataTypeInstAlias<opc, ".s8", asm, Result>; - def U8 : VFPDataTypeInstAlias<opc, ".u8", asm, Result>; - def F8 : VFPDataTypeInstAlias<opc, ".p8", asm, Result>; -} -// VFPDT8ReqInstAlias plus plain ".8" -multiclass VFPDT8InstAlias<string opc, string asm, dag Result> { - def _8 : VFPDataTypeInstAlias<opc, ".8", asm, Result>; - defm : VFPDT8ReqInstAlias<opc, asm, Result>; -} -multiclass VFPDT16ReqInstAlias<string opc, string asm, dag Result> { - def I16 : VFPDataTypeInstAlias<opc, ".i16", asm, Result>; - def S16 : VFPDataTypeInstAlias<opc, ".s16", asm, Result>; - def U16 : VFPDataTypeInstAlias<opc, ".u16", asm, Result>; - def F16 : VFPDataTypeInstAlias<opc, ".p16", asm, Result>; -} -// VFPDT16ReqInstAlias plus plain ".16" -multiclass VFPDT16InstAlias<string opc, string asm, dag Result> { - def _16 : VFPDataTypeInstAlias<opc, ".16", asm, Result>; - defm : VFPDT16ReqInstAlias<opc, asm, Result>; -} -multiclass VFPDT32ReqInstAlias<string opc, string asm, dag Result> { - def I32 : VFPDataTypeInstAlias<opc, ".i32", asm, Result>; - def S32 : VFPDataTypeInstAlias<opc, ".s32", asm, Result>; - def U32 : VFPDataTypeInstAlias<opc, ".u32", asm, Result>; - def F32 : VFPDataTypeInstAlias<opc, ".f32", asm, Result>; - def F : VFPDataTypeInstAlias<opc, ".f", asm, Result>; -} -// VFPDT32ReqInstAlias plus plain ".32" -multiclass VFPDT32InstAlias<string opc, string asm, dag Result> { - def _32 : VFPDataTypeInstAlias<opc, ".32", asm, Result>; - defm : VFPDT32ReqInstAlias<opc, asm, Result>; -} -multiclass VFPDT64ReqInstAlias<string opc, string asm, dag Result> { - def I64 : VFPDataTypeInstAlias<opc, ".i64", asm, Result>; - def S64 : VFPDataTypeInstAlias<opc, ".s64", asm, Result>; - def U64 : VFPDataTypeInstAlias<opc, ".u64", asm, Result>; - def F64 : VFPDataTypeInstAlias<opc, ".f64", asm, Result>; - def D : VFPDataTypeInstAlias<opc, ".d", asm, Result>; -} -// VFPDT64ReqInstAlias plus plain ".64" -multiclass VFPDT64InstAlias<string opc, string asm, dag Result> { - def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>; - defm : VFPDT64ReqInstAlias<opc, asm, Result>; -} -multiclass VFPDT64NoF64ReqInstAlias<string opc, string asm, dag Result> { - def I64 : VFPDataTypeInstAlias<opc, ".i64", asm, Result>; - def S64 : VFPDataTypeInstAlias<opc, ".s64", asm, Result>; - def U64 : VFPDataTypeInstAlias<opc, ".u64", asm, Result>; - def D : VFPDataTypeInstAlias<opc, ".d", asm, Result>; -} -// VFPDT64ReqInstAlias plus plain ".64" -multiclass VFPDT64NoF64InstAlias<string opc, string asm, dag Result> { - def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>; - defm : VFPDT64ReqInstAlias<opc, asm, Result>; -} + InstAlias<!strconcat(opc, dt, "\t", asm), Result>, Requires<[HasVFP2]>; + multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> { - defm : VFPDT8InstAlias<opc, asm, Result>; - defm : VFPDT16InstAlias<opc, asm, Result>; - defm : VFPDT32InstAlias<opc, asm, Result>; - defm : VFPDT64InstAlias<opc, asm, Result>; -} -multiclass VFPDTAnyNoF64InstAlias<string opc, string asm, dag Result> { - defm : VFPDT8InstAlias<opc, asm, Result>; - defm : VFPDT16InstAlias<opc, asm, Result>; - defm : VFPDT32InstAlias<opc, asm, Result>; - defm : VFPDT64NoF64InstAlias<opc, asm, Result>; -} + def : VFPDataTypeInstAlias<opc, ".8", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".16", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".32", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".64", asm, Result>; +} + +// The same alias classes using AsmPseudo instead, for the more complex +// stuff in NEON that InstAlias can't quite handle. +// Note that we can't use anonymous defm references here like we can +// above, as we care about the ultimate instruction enum names generated, unlike +// for instalias defs. +class NEONDataTypeAsmPseudoInst<string opc, string dt, string asm, dag iops> : + AsmPseudoInst<!strconcat(opc, dt, "\t", asm), iops>, Requires<[HasNEON]>; +multiclass NEONDT8ReqAsmPseudoInst<string opc, string asm, dag iops> { + def I8 : NEONDataTypeAsmPseudoInst<opc, ".i8", asm, iops>; + def S8 : NEONDataTypeAsmPseudoInst<opc, ".s8", asm, iops>; + def U8 : NEONDataTypeAsmPseudoInst<opc, ".u8", asm, iops>; + def P8 : NEONDataTypeAsmPseudoInst<opc, ".p8", asm, iops>; +} +// NEONDT8ReqAsmPseudoInst plus plain ".8" +multiclass NEONDT8AsmPseudoInst<string opc, string asm, dag iops> { + def _8 : NEONDataTypeAsmPseudoInst<opc, ".8", asm, iops>; + defm _ : NEONDT8ReqAsmPseudoInst<opc, asm, iops>; +} +multiclass NEONDT16ReqAsmPseudoInst<string opc, string asm, dag iops> { + def I16 : NEONDataTypeAsmPseudoInst<opc, ".i16", asm, iops>; + def S16 : NEONDataTypeAsmPseudoInst<opc, ".s16", asm, iops>; + def U16 : NEONDataTypeAsmPseudoInst<opc, ".u16", asm, iops>; + def P16 : NEONDataTypeAsmPseudoInst<opc, ".p16", asm, iops>; +} +// NEONDT16ReqAsmPseudoInst plus plain ".16" +multiclass NEONDT16AsmPseudoInst<string opc, string asm, dag iops> { + def _16 : NEONDataTypeAsmPseudoInst<opc, ".16", asm, iops>; + defm _ : NEONDT16ReqAsmPseudoInst<opc, asm, iops>; +} +multiclass NEONDT32ReqAsmPseudoInst<string opc, string asm, dag iops> { + def I32 : NEONDataTypeAsmPseudoInst<opc, ".i32", asm, iops>; + def S32 : NEONDataTypeAsmPseudoInst<opc, ".s32", asm, iops>; + def U32 : NEONDataTypeAsmPseudoInst<opc, ".u32", asm, iops>; + def F32 : NEONDataTypeAsmPseudoInst<opc, ".f32", asm, iops>; + def F : NEONDataTypeAsmPseudoInst<opc, ".f", asm, iops>; +} +// NEONDT32ReqAsmPseudoInst plus plain ".32" +multiclass NEONDT32AsmPseudoInst<string opc, string asm, dag iops> { + def _32 : NEONDataTypeAsmPseudoInst<opc, ".32", asm, iops>; + defm _ : NEONDT32ReqAsmPseudoInst<opc, asm, iops>; +} +multiclass NEONDT64ReqAsmPseudoInst<string opc, string asm, dag iops> { + def I64 : NEONDataTypeAsmPseudoInst<opc, ".i64", asm, iops>; + def S64 : NEONDataTypeAsmPseudoInst<opc, ".s64", asm, iops>; + def U64 : NEONDataTypeAsmPseudoInst<opc, ".u64", asm, iops>; + def F64 : NEONDataTypeAsmPseudoInst<opc, ".f64", asm, iops>; + def D : NEONDataTypeAsmPseudoInst<opc, ".d", asm, iops>; +} +// NEONDT64ReqAsmPseudoInst plus plain ".64" +multiclass NEONDT64AsmPseudoInst<string opc, string asm, dag iops> { + def _64 : NEONDataTypeAsmPseudoInst<opc, ".64", asm, iops>; + defm _ : NEONDT64ReqAsmPseudoInst<opc, asm, iops>; +} +multiclass NEONDT64NoF64ReqAsmPseudoInst<string opc, string asm, dag iops> { + def I64 : NEONDataTypeAsmPseudoInst<opc, ".i64", asm, iops>; + def S64 : NEONDataTypeAsmPseudoInst<opc, ".s64", asm, iops>; + def U64 : NEONDataTypeAsmPseudoInst<opc, ".u64", asm, iops>; + def D : NEONDataTypeAsmPseudoInst<opc, ".d", asm, iops>; +} +// NEONDT64ReqAsmPseudoInst plus plain ".64" +multiclass NEONDT64NoF64AsmPseudoInst<string opc, string asm, dag iops> { + def _64 : NEONDataTypeAsmPseudoInst<opc, ".64", asm, iops>; + defm _ : NEONDT64ReqAsmPseudoInst<opc, asm, iops>; +} +multiclass NEONDTAnyAsmPseudoInst<string opc, string asm, dag iops> { + defm _ : NEONDT8AsmPseudoInst<opc, asm, iops>; + defm _ : NEONDT16AsmPseudoInst<opc, asm, iops>; + defm _ : NEONDT32AsmPseudoInst<opc, asm, iops>; + defm _ : NEONDT64AsmPseudoInst<opc, asm, iops>; +} +multiclass NEONDTAnyNoF64AsmPseudoInst<string opc, string asm, dag iops> { + defm _ : NEONDT8AsmPseudoInst<opc, asm, iops>; + defm _ : NEONDT16AsmPseudoInst<opc, asm, iops>; + defm _ : NEONDT32AsmPseudoInst<opc, asm, iops>; + defm _ : NEONDT64NoF64AsmPseudoInst<opc, asm, iops>; +} + +// Data type suffix token aliases. Implements Table A7-3 in the ARM ARM. +def : TokenAlias<".s8", ".i8">; +def : TokenAlias<".u8", ".i8">; +def : TokenAlias<".s16", ".i16">; +def : TokenAlias<".u16", ".i16">; +def : TokenAlias<".s32", ".i32">; +def : TokenAlias<".u32", ".i32">; +def : TokenAlias<".s64", ".i64">; +def : TokenAlias<".u64", ".i64">; + +def : TokenAlias<".i8", ".8">; +def : TokenAlias<".i16", ".16">; +def : TokenAlias<".i32", ".32">; +def : TokenAlias<".i64", ".64">; + +def : TokenAlias<".p8", ".8">; +def : TokenAlias<".p16", ".16">; + +def : TokenAlias<".f32", ".32">; +def : TokenAlias<".f64", ".64">; +def : TokenAlias<".f", ".f32">; +def : TokenAlias<".d", ".f64">; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index be03924..516a080 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -238,27 +238,23 @@ def so_imm_not_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32); }]>; -/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15]. -def imm1_15 : ImmLeaf<i32, [{ - return (int32_t)Imm >= 1 && (int32_t)Imm < 16; -}]>; - /// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31]. def imm16_31 : ImmLeaf<i32, [{ return (int32_t)Imm >= 16 && (int32_t)Imm < 32; }]>; -def so_imm_neg : - PatLeaf<(imm), [{ +def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; } +def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1; - }], so_imm_neg_XFORM>; + }], so_imm_neg_XFORM> { + let ParserMatchClass = so_imm_neg_asmoperand; +} // Note: this pattern doesn't require an encoder method and such, as it's // only used on aliases (Pat<> and InstAlias<>). The actual encoding -// is handled by the destination instructions, which use t2_so_imm. +// is handled by the destination instructions, which use so_imm. def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; } -def so_imm_not : - Operand<i32>, PatLeaf<(imm), [{ +def so_imm_not : Operand<i32>, PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1; }], so_imm_not_XFORM> { let ParserMatchClass = so_imm_not_asmoperand; @@ -512,6 +508,14 @@ def arm_i32imm : PatLeaf<(imm), [{ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue()); }]>; +/// imm0_1 predicate - Immediate in the range [0,1]. +def Imm0_1AsmOperand: ImmAsmOperand { let Name = "Imm0_1"; } +def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; } + +/// imm0_3 predicate - Immediate in the range [0,3]. +def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; } +def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; } + /// imm0_7 predicate - Immediate in the range [0,7]. def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; } def imm0_7 : Operand<i32>, ImmLeaf<i32, [{ @@ -520,6 +524,42 @@ def imm0_7 : Operand<i32>, ImmLeaf<i32, [{ let ParserMatchClass = Imm0_7AsmOperand; } +/// imm8 predicate - Immediate is exactly 8. +def Imm8AsmOperand: ImmAsmOperand { let Name = "Imm8"; } +def imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 8; }]> { + let ParserMatchClass = Imm8AsmOperand; +} + +/// imm16 predicate - Immediate is exactly 16. +def Imm16AsmOperand: ImmAsmOperand { let Name = "Imm16"; } +def imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 16; }]> { + let ParserMatchClass = Imm16AsmOperand; +} + +/// imm32 predicate - Immediate is exactly 32. +def Imm32AsmOperand: ImmAsmOperand { let Name = "Imm32"; } +def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> { + let ParserMatchClass = Imm32AsmOperand; +} + +/// imm1_7 predicate - Immediate in the range [1,7]. +def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; } +def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> { + let ParserMatchClass = Imm1_7AsmOperand; +} + +/// imm1_15 predicate - Immediate in the range [1,15]. +def Imm1_15AsmOperand: ImmAsmOperand { let Name = "Imm1_15"; } +def imm1_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 16; }]> { + let ParserMatchClass = Imm1_15AsmOperand; +} + +/// imm1_31 predicate - Immediate in the range [1,31]. +def Imm1_31AsmOperand: ImmAsmOperand { let Name = "Imm1_31"; } +def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> { + let ParserMatchClass = Imm1_31AsmOperand; +} + /// imm0_15 predicate - Immediate in the range [0,15]. def Imm0_15AsmOperand: ImmAsmOperand { let Name = "Imm0_15"; } def imm0_15 : Operand<i32>, ImmLeaf<i32, [{ @@ -544,6 +584,14 @@ def imm0_32 : Operand<i32>, ImmLeaf<i32, [{ let ParserMatchClass = Imm0_32AsmOperand; } +/// imm0_63 predicate - True if the 32-bit immediate is in the range [0,63]. +def Imm0_63AsmOperand: ImmAsmOperand { let Name = "Imm0_63"; } +def imm0_63 : Operand<i32>, ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 64; +}]> { + let ParserMatchClass = Imm0_63AsmOperand; +} + /// imm0_255 predicate - Immediate in the range [0,255]. def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; } def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> { @@ -812,6 +860,9 @@ def addrmode6dup : Operand<i32>, let PrintMethod = "printAddrMode6Operand"; let MIOperandInfo = (ops GPR:$addr, i32imm); let EncoderMethod = "getAddrMode6DupAddressOpValue"; + // FIXME: This is close, but not quite right. The alignment specifier is + // different. + let ParserMatchClass = AddrMode6AsmOperand; } // addrmodepc := pc + reg @@ -2753,23 +2804,25 @@ defm STRHT : AI3strT<0b1011, "strht">; // Load / store multiple Instructions. // -multiclass arm_ldst_mult<string asm, bit L_bit, Format f, +multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f, InstrItinClass itin, InstrItinClass itin_upd> { // IA is the default, so no need for an explicit suffix on the // mnemonic here. Without it is the cannonical spelling. def IA : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeNone, f, itin, - !strconcat(asm, "${p}\t$Rn, $regs"), "", []> { + !strconcat(asm, "${p}\t$Rn, $regs", sfx), "", []> { let Inst{24-23} = 0b01; // Increment After + let Inst{22} = P_bit; let Inst{21} = 0; // No writeback let Inst{20} = L_bit; } def IA_UPD : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeUpd, f, itin_upd, - !strconcat(asm, "${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + !strconcat(asm, "${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { let Inst{24-23} = 0b01; // Increment After + let Inst{22} = P_bit; let Inst{21} = 1; // Writeback let Inst{20} = L_bit; @@ -2778,16 +2831,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, def DA : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeNone, f, itin, - !strconcat(asm, "da${p}\t$Rn, $regs"), "", []> { + !strconcat(asm, "da${p}\t$Rn, $regs", sfx), "", []> { let Inst{24-23} = 0b00; // Decrement After + let Inst{22} = P_bit; let Inst{21} = 0; // No writeback let Inst{20} = L_bit; } def DA_UPD : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeUpd, f, itin_upd, - !strconcat(asm, "da${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + !strconcat(asm, "da${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { let Inst{24-23} = 0b00; // Decrement After + let Inst{22} = P_bit; let Inst{21} = 1; // Writeback let Inst{20} = L_bit; @@ -2796,16 +2851,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, def DB : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeNone, f, itin, - !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> { + !strconcat(asm, "db${p}\t$Rn, $regs", sfx), "", []> { let Inst{24-23} = 0b10; // Decrement Before + let Inst{22} = P_bit; let Inst{21} = 0; // No writeback let Inst{20} = L_bit; } def DB_UPD : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeUpd, f, itin_upd, - !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + !strconcat(asm, "db${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { let Inst{24-23} = 0b10; // Decrement Before + let Inst{22} = P_bit; let Inst{21} = 1; // Writeback let Inst{20} = L_bit; @@ -2814,16 +2871,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, def IB : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeNone, f, itin, - !strconcat(asm, "ib${p}\t$Rn, $regs"), "", []> { + !strconcat(asm, "ib${p}\t$Rn, $regs", sfx), "", []> { let Inst{24-23} = 0b11; // Increment Before + let Inst{22} = P_bit; let Inst{21} = 0; // No writeback let Inst{20} = L_bit; } def IB_UPD : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeUpd, f, itin_upd, - !strconcat(asm, "ib${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + !strconcat(asm, "ib${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { let Inst{24-23} = 0b11; // Increment Before + let Inst{22} = P_bit; let Inst{21} = 1; // Writeback let Inst{20} = L_bit; @@ -2834,10 +2893,12 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, let neverHasSideEffects = 1 in { let mayLoad = 1, hasExtraDefRegAllocReq = 1 in -defm LDM : arm_ldst_mult<"ldm", 1, LdStMulFrm, IIC_iLoad_m, IIC_iLoad_mu>; +defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m, + IIC_iLoad_mu>; let mayStore = 1, hasExtraSrcRegAllocReq = 1 in -defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>; +defm STM : arm_ldst_mult<"stm", "", 0, 0, LdStMulFrm, IIC_iStore_m, + IIC_iStore_mu>; } // neverHasSideEffects @@ -2851,6 +2912,16 @@ def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, (LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>, RegConstraint<"$Rn = $wb">; +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in +defm sysLDM : arm_ldst_mult<"ldm", " ^", 1, 1, LdStMulFrm, IIC_iLoad_m, + IIC_iLoad_mu>; + +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in +defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m, + IIC_iStore_mu>; + + + //===----------------------------------------------------------------------===// // Move Instructions. // @@ -4999,6 +5070,32 @@ def : MnemonicAlias<"usubaddx", "usax">; // for isel. def : ARMInstAlias<"mov${s}${p} $Rd, $imm", (MVNi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>; +def : ARMInstAlias<"mvn${s}${p} $Rd, $imm", + (MOVi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>; +// Same for AND <--> BIC +def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm", + (ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : ARMInstAlias<"bic${s}${p} $Rdn, $imm", + (ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm", + (BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : ARMInstAlias<"and${s}${p} $Rdn, $imm", + (BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; + +// Likewise, "add Rd, so_imm_neg" -> sub +def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm", + (SUBri GPR:$Rd, GPR:$Rn, so_imm_neg:$imm, pred:$p, cc_out:$s)>; +def : ARMInstAlias<"add${s}${p} $Rd, $imm", + (SUBri GPR:$Rd, GPR:$Rd, so_imm_neg:$imm, pred:$p, cc_out:$s)>; +// Same for CMP <--> CMN via so_imm_neg +def : ARMInstAlias<"cmp${p} $Rd, $imm", + (CMNzri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>; +def : ARMInstAlias<"cmn${p} $Rd, $imm", + (CMPri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>; // The shifter forms of the MOV instruction are aliased to the ASR, LSL, // LSR, ROR, and RRX instructions. @@ -5056,4 +5153,8 @@ def : ARMInstAlias<"ror${s}${p} $Rn, $Rm", // 'mul' instruction can be specified with only two operands. def : ARMInstAlias<"mul${s}${p} $Rn, $Rm", - (MUL rGPR:$Rn, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>; + (MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p, cc_out:$s)>; + +// "neg" is and alias for "rsb rd, rn, #0" +def : ARMInstAlias<"neg${s}${p} $Rd, $Rm", + (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index f2ca963..c40860d 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -74,9 +74,11 @@ def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ let MIOperandInfo = (ops i32imm); } +// Register list of one D register. def VecListOneDAsmOperand : AsmOperandClass { let Name = "VecListOneD"; let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; } def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { let ParserMatchClass = VecListOneDAsmOperand; @@ -85,6 +87,7 @@ def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { def VecListTwoDAsmOperand : AsmOperandClass { let Name = "VecListTwoD"; let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; } def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> { let ParserMatchClass = VecListTwoDAsmOperand; @@ -93,6 +96,7 @@ def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> { def VecListThreeDAsmOperand : AsmOperandClass { let Name = "VecListThreeD"; let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; } def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { let ParserMatchClass = VecListThreeDAsmOperand; @@ -101,6 +105,7 @@ def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { def VecListFourDAsmOperand : AsmOperandClass { let Name = "VecListFourD"; let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; } def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { let ParserMatchClass = VecListFourDAsmOperand; @@ -109,11 +114,92 @@ def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { def VecListTwoQAsmOperand : AsmOperandClass { let Name = "VecListTwoQ"; let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; } -def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwo"> { +def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwoSpaced"> { let ParserMatchClass = VecListTwoQAsmOperand; } +// Register list of one D register, with "all lanes" subscripting. +def VecListOneDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListOneDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { + let ParserMatchClass = VecListOneDAllLanesAsmOperand; +} +// Register list of two D registers, with "all lanes" subscripting. +def VecListTwoDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListTwoDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListTwoDAllLanes : RegisterOperand<DPR, "printVectorListTwoAllLanes"> { + let ParserMatchClass = VecListTwoDAllLanesAsmOperand; +} + +// Register list of one D register, with byte lane subscripting. +def VecListOneDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListOneDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListOneDByteIndexed : Operand<i32> { + let ParserMatchClass = VecListOneDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListOneDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListOneDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListOneDHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListOneDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListOneDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListOneDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListOneDWordIndexed : Operand<i32> { + let ParserMatchClass = VecListOneDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// Register list of two D registers, with byte lane subscripting. +def VecListTwoDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoDByteIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoDHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListTwoDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoDWordIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. //===----------------------------------------------------------------------===// @@ -272,12 +358,23 @@ class VLDQWBregisterPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), (ins addrmode6:$addr, rGPR:$offset), itin, "$addr.addr = $wb">; + class VLDQQPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; class VLDQQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset), itin, "$addr.addr = $wb">; +class VLDQQWBfixedPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), + (ins addrmode6:$addr), itin, + "$addr.addr = $wb">; +class VLDQQWBregisterPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), + (ins addrmode6:$addr, rGPR:$offset), itin, + "$addr.addr = $wb">; + + class VLDQQQQPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, "$src = $dst">; @@ -462,31 +559,23 @@ defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; // VLD2 : Vector Load (multiple 2-element structures) -class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy> +class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, + InstrItinClass itin> : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), - (ins addrmode6:$Rn), IIC_VLD2, - "vld2", Dt, "$Vd, $Rn", "", []> { - let Rm = 0b1111; - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; -} -class VLD2Q<bits<4> op7_4, string Dt, RegisterOperand VdTy> - : NLdSt<0, 0b10, 0b0011, op7_4, - (outs VdTy:$Vd), - (ins addrmode6:$Rn), IIC_VLD2x2, + (ins addrmode6:$Rn), itin, "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; } -def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8", VecListTwoD>; -def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16", VecListTwoD>; -def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32", VecListTwoD>; +def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2>; +def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2>; +def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2>; -def VLD2q8 : VLD2Q<{0,0,?,?}, "8", VecListFourD>; -def VLD2q16 : VLD2Q<{0,1,?,?}, "16", VecListFourD>; -def VLD2q32 : VLD2Q<{1,0,?,?}, "32", VecListFourD>; +def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; +def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; +def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>; def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>; @@ -497,47 +586,56 @@ def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; // ...with address register writeback: -class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy> - : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u, - "vld2", Dt, "$Vd, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; -} -class VLD2QWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> - : NLdSt<0, 0b10, 0b0011, op7_4, - (outs VdTy:$Vd, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u, - "vld2", Dt, "$Vd, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; +multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, + RegisterOperand VdTy, InstrItinClass itin> { + def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), + (ins addrmode6:$Rn), itin, + "vld2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm), itin, + "vld2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>; -def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>; -def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>; +defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2u>; +defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2u>; +defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2u>; -def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8", VecListFourD>; -def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16", VecListFourD>; -def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32", VecListFourD>; +defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; +defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; +defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; -def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; -def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; -def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; +def VLD2d8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; +def VLD2d16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; +def VLD2d32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; +def VLD2d8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; +def VLD2d16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; +def VLD2d32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; -def VLD2q8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; -def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; -def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; +def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; +def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; +def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; +def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; +def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; +def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; // ...with double-spaced registers -def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8", VecListTwoQ>; -def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16", VecListTwoQ>; -def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32", VecListTwoQ>; -def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>; -def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>; -def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>; +def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2>; +def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2>; +def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2>; +defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2u>; +defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2u>; +defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2u>; // VLD3 : Vector Load (multiple 3-element structures) class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -997,9 +1095,11 @@ def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; // VLD1DUP : Vector Load (single element to all lanes) class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn), - IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "", - [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { + : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), + (ins addrmode6dup:$Rn), + IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", + [(set VecListOneDAllLanes:$Vd, + (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; @@ -1025,9 +1125,9 @@ def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { class VLD1QDUP<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2), + : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd), (ins addrmode6dup:$Rn), IIC_VLD1dup, - "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; @@ -1038,32 +1138,63 @@ def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">; def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">; // ...with address register writeback: -class VLD1DUPWB<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, - "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD1DupInstruction"; +multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListOneDAllLanes:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListOneDAllLanes:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -class VLD1QDUPWB<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), - (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, - "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD1DupInstruction"; +multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListTwoDAllLanes:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListTwoDAllLanes:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">; -def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">; -def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">; +defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">; +defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">; +defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">; -def VLD1DUPq8_UPD : VLD1QDUPWB<{0,0,1,0}, "8">; -def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">; -def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">; +defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">; +defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; +defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; -def VLD1DUPq8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; -def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; -def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; +def VLD1DUPq8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; +def VLD1DUPq16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; +def VLD1DUPq32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; +def VLD1DUPq8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; +def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; +def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; // VLD2DUP : Vector Load (single 2-element structure to all lanes) class VLD2DUP<bits<4> op7_4, string Dt> @@ -1329,94 +1460,109 @@ def VST1q64PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; // ...with 3 registers class VST1D3<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), - IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { + (ins addrmode6:$Rn, VecListThreeD:$Vd), + IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVSTInstruction"; } -class VST1D3WB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$Vd, DPR:$src2, DPR:$src3), - IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST1D3WB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, + "vst1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), + IIC_VLD1x3u, + "vst1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -def VST1d8T : VST1D3<{0,0,0,?}, "8">; -def VST1d16T : VST1D3<{0,1,0,?}, "16">; -def VST1d32T : VST1D3<{1,0,0,?}, "32">; -def VST1d64T : VST1D3<{1,1,0,?}, "64">; +def VST1d8T : VST1D3<{0,0,0,?}, "8">; +def VST1d16T : VST1D3<{0,1,0,?}, "16">; +def VST1d32T : VST1D3<{1,0,0,?}, "32">; +def VST1d64T : VST1D3<{1,1,0,?}, "64">; -def VST1d8T_UPD : VST1D3WB<{0,0,0,?}, "8">; -def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">; -def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">; -def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">; +defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">; +defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">; +defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">; +defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">; -def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; -def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>; +def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; +def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>; +def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; // ...with 4 registers class VST1D4<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "", + (ins addrmode6:$Rn, VecListFourD:$Vd), + IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; } -class VST1D4WB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u, - "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST1D4WB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, + "vst1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), + IIC_VLD1x4u, + "vst1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -def VST1d8Q : VST1D4<{0,0,?,?}, "8">; -def VST1d16Q : VST1D4<{0,1,?,?}, "16">; -def VST1d32Q : VST1D4<{1,0,?,?}, "32">; -def VST1d64Q : VST1D4<{1,1,?,?}, "64">; +def VST1d8Q : VST1D4<{0,0,?,?}, "8">; +def VST1d16Q : VST1D4<{0,1,?,?}, "16">; +def VST1d32Q : VST1D4<{1,0,?,?}, "32">; +def VST1d64Q : VST1D4<{1,1,?,?}, "64">; -def VST1d8Q_UPD : VST1D4WB<{0,0,?,?}, "8">; -def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">; -def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">; -def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">; +defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">; +defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">; +defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">; +defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">; -def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; -def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>; +def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; +def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>; +def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; // VST2 : Vector Store (multiple 2-element structures) -class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), - IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> { - let Rm = 0b1111; - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; -} -class VST2Q<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0011, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", - "", []> { +class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, + InstrItinClass itin> + : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd), + itin, "vst2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; } -def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">; -def VST2d16 : VST2D<0b1000, {0,1,?,?}, "16">; -def VST2d32 : VST2D<0b1000, {1,0,?,?}, "32">; +def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VST2>; +def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VST2>; +def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VST2>; -def VST2q8 : VST2Q<{0,0,?,?}, "8">; -def VST2q16 : VST2Q<{0,1,?,?}, "16">; -def VST2q32 : VST2Q<{1,0,?,?}, "32">; +def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; +def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; +def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; def VST2d8Pseudo : VSTQPseudo<IIC_VST2>; def VST2d16Pseudo : VSTQPseudo<IIC_VST2>; @@ -1427,47 +1573,76 @@ def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; // ...with address register writeback: -class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2), - IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, + RegisterOperand VdTy> { + def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -class VST2QWB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u, - "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST2QWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), + IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">; -def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">; -def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">; +defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>; +defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>; +defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>; -def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">; -def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">; -def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">; +defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; +defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; +defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; -def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; -def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; -def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; +def VST2d8PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; +def VST2d16PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; +def VST2d32PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; +def VST2d8PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; +def VST2d16PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; +def VST2d32PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; -def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q8PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q16PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q32PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q8PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q16PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q32PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; // ...with double-spaced registers -def VST2b8 : VST2D<0b1001, {0,0,?,?}, "8">; -def VST2b16 : VST2D<0b1001, {0,1,?,?}, "16">; -def VST2b32 : VST2D<0b1001, {1,0,?,?}, "32">; -def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8">; -def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">; -def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">; +def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VST2>; +def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VST2>; +def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VST2>; +defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>; +defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>; +defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>; // VST3 : Vector Store (multiple 3-element structures) class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -1741,10 +1916,10 @@ def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; // ...with address register writeback: class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, - "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", - "$addr.addr = $wb", []> { + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, + "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVST2LN"; } @@ -2573,9 +2748,9 @@ class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, // Long shift by immediate. class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDNode OpNode> + ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, op6, op4, - (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm, + (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; @@ -2805,14 +2980,11 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, v4i32, v4i32, OpNode, Commutable>; } -multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> { - def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), - v4i16, ShOp>; - def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"), - v2i32, ShOp>; - def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), - v8i16, v4i16, ShOp>; - def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"), +multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { + def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; + def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; + def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; + def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", v4i32, v2i32, ShOp>; } @@ -3477,15 +3649,15 @@ multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, string Dt, SDNode OpNode> { def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, - OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> { + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, - OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> { + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, - OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> { + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } } @@ -3574,7 +3746,7 @@ def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", v2f32, v2f32, fmul, 1>; def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", v4f32, v4f32, fmul, 1>; -defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>; +defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, v2f32, fmul>; @@ -4285,18 +4457,18 @@ defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; // VSHLL : Vector Shift Left Long (with maximum shift count) class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, - ValueType OpTy, SDNode OpNode> + ValueType OpTy, Operand ImmTy, SDNode OpNode> : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, - ResTy, OpTy, OpNode> { + ResTy, OpTy, ImmTy, OpNode> { let Inst{21-16} = op21_16; let DecoderMethod = "DecodeVSHLMaxInstruction"; } def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", - v8i16, v8i8, NEONvshlli>; + v8i16, v8i8, imm8, NEONvshlli>; def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", - v4i32, v4i16, NEONvshlli>; + v4i32, v4i16, imm16, NEONvshlli>; def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", - v2i64, v2i32, NEONvshlli>; + v2i64, v2i32, imm32, NEONvshlli>; // VSHRN : Vector Shift Right and Narrow defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", @@ -4469,10 +4641,6 @@ def : InstAlias<"vmov${p} $Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; def : InstAlias<"vmov${p} $Vd, $Vm", (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyNoF64InstAlias<"vmov${p}", "$Vd, $Vm", - (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyNoF64InstAlias<"vmov${p}", "$Vd, $Vm", - (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; // VMOV : Vector Move (Immediate) @@ -4932,34 +5100,34 @@ def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; // VEXT : Vector Extract -class VEXTd<string OpcodeStr, string Dt, ValueType Ty> +class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm, + (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), - (Ty DPR:$Vm), imm:$index)))]> { + (Ty DPR:$Vm), imm:$index)))]> { bits<4> index; let Inst{11-8} = index{3-0}; } -class VEXTq<string OpcodeStr, string Dt, ValueType Ty> +class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), - (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm, + (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), - (Ty QPR:$Vm), imm:$index)))]> { + (Ty QPR:$Vm), imm:$index)))]> { bits<4> index; let Inst{11-8} = index{3-0}; } -def VEXTd8 : VEXTd<"vext", "8", v8i8> { +def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { let Inst{11-8} = index{3-0}; } -def VEXTd16 : VEXTd<"vext", "16", v4i16> { +def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { let Inst{11-9} = index{2-0}; let Inst{8} = 0b0; } -def VEXTd32 : VEXTd<"vext", "32", v2i32> { +def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { let Inst{11-10} = index{1-0}; let Inst{9-8} = 0b00; } @@ -4968,17 +5136,21 @@ def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (i32 imm:$index))), (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; -def VEXTq8 : VEXTq<"vext", "8", v16i8> { +def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { let Inst{11-8} = index{3-0}; } -def VEXTq16 : VEXTq<"vext", "16", v8i16> { +def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { let Inst{11-9} = index{2-0}; let Inst{8} = 0b0; } -def VEXTq32 : VEXTq<"vext", "32", v4i32> { +def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { let Inst{11-10} = index{1-0}; let Inst{9-8} = 0b00; } +def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { + let Inst{11} = index{0}; + let Inst{10-8} = 0b000; +} def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))), @@ -5026,17 +5198,17 @@ def VTBL1 let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2, - "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>; + (ins VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3, - "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>; + (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; def VTBL4 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), + (ins VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB4, - "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>; + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; } // hasExtraSrcRegAllocReq = 1 def VTBL2Pseudo @@ -5056,18 +5228,18 @@ def VTBX1 let hasExtraSrcRegAllocReq = 1 in { def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), - (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2, - "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>; + (ins DPR:$orig, VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; def VTBX3 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), - (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), + (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX3, - "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; def VTBX4 - : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn, - DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4, - "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", + : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), + (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; } // hasExtraSrcRegAllocReq = 1 @@ -5207,11 +5379,83 @@ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; // Assembler aliases // -// VAND/VEOR/VORR accept but do not require a type suffix. +def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", + (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; +def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", + (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; + + +// VADD two-operand aliases. +def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm", + (VADDv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm", + (VADDv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm", + (VADDv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm", + (VADDv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm", + (VADDv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm", + (VADDv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm", + (VADDv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm", + (VADDv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm", + (VADDfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm", + (VADDfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VSUB two-operand aliases. +def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm", + (VSUBv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm", + (VSUBv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm", + (VSUBv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm", + (VSUBv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm", + (VSUBv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm", + (VSUBv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm", + (VSUBv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm", + (VSUBv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm", + (VSUBfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm", + (VSUBfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VADDW two-operand aliases. +def : NEONInstAlias<"vaddw${p}.s8 $Vdn, $Vm", + (VADDWsv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.s16 $Vdn, $Vm", + (VADDWsv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.s32 $Vdn, $Vm", + (VADDWsv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.u8 $Vdn, $Vm", + (VADDWuv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.u16 $Vdn, $Vm", + (VADDWuv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm", + (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; + +// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", + (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", + (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", @@ -5220,245 +5464,450 @@ defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; - -// VLD1 requires a size suffix, but also accepts type specific variants. -// Load one D register. -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d8 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d16 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d32 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d64 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; -// with writeback, fixed stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d8wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d16wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d32wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d64wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -// with writeback, register stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d8wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d16wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d32wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d64wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; - -// Load two D registers. -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1q8 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1q16 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1q32 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1q64 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; -// with writeback, fixed stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1q8wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1q16wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1q32wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1q64wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -// with writeback, register stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1q8wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1q16wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1q32wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1q64wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; - -// Load three D registers. -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d8T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d16T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d32T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d64T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; -// with writeback, fixed stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d8Twb_fixed VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d16Twb_fixed VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d32Twb_fixed VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d64Twb_fixed VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -// with writeback, register stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d8Twb_register VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d16Twb_register VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d32Twb_register VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d64Twb_register VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; - - -// Load four D registers. -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d8Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d16Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d32Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d64Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; -// with writeback, fixed stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d8Qwb_fixed VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d16Qwb_fixed VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d32Qwb_fixed VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d64Qwb_fixed VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -// with writeback, register stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d8Qwb_register VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d16Qwb_register VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d32Qwb_register VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d64Qwb_register VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; - -// VST1 requires a size suffix, but also accepts type specific variants. -// Store one D register. -defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1d8 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1d16 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1d32 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1d64 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -// with writeback, fixed stride -defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1d8wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1d16wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1d32wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1d64wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -// with writeback, register stride -defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1d8wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm, - VecListOneD:$Vd, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1d16wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm, - VecListOneD:$Vd, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1d32wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm, - VecListOneD:$Vd, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1d64wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm, - VecListOneD:$Vd, pred:$p)>; - -// Store two D registers. -defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1q8 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1q16 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1q32 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1q64 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -// with writeback, fixed stride -defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1q8wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1q16wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1q32wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1q64wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -// with writeback, register stride -defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1q8wb_register zero_reg, addrmode6:$Rn, - rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1q16wb_register zero_reg, addrmode6:$Rn, - rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1q32wb_register zero_reg, addrmode6:$Rn, - rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1q64wb_register zero_reg, addrmode6:$Rn, - rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>; - -// FIXME: The three and four register VST1 instructions haven't been moved -// to the VecList* encoding yet, so we can't do assembly parsing support -// for them. Uncomment these when that happens. -// Load three D registers. -//defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d8T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; -//defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d16T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; -//defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d32T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; -//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d64T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; - -// Load four D registers. -//defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d8Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; -//defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d16Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; -//defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d32Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; -//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d64Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; - - -// VTRN instructions data type suffix aliases for more-specific types. -defm : VFPDT8ReqInstAlias <"vtrn${p}", "$Dd, $Dm", - (VTRNd8 DPR:$Dd, DPR:$Dm, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vtrn${p}", "$Dd, $Dm", - (VTRNd16 DPR:$Dd, DPR:$Dm, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vtrn${p}", "$Dd, $Dm", - (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; - -defm : VFPDT8ReqInstAlias <"vtrn${p}", "$Qd, $Qm", - (VTRNq8 QPR:$Qd, QPR:$Qm, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vtrn${p}", "$Qd, $Qm", - (VTRNq16 QPR:$Qd, QPR:$Qm, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vtrn${p}", "$Qd, $Qm", - (VTRNq32 QPR:$Qd, QPR:$Qm, pred:$p)>; +// ... two-operand aliases +def : NEONInstAlias<"vand${p} $Vdn, $Vm", + (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vand${p} $Vdn, $Vm", + (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vbic${p} $Vdn, $Vm", + (VBICd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vbic${p} $Vdn, $Vm", + (VBICq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"veor${p} $Vdn, $Vm", + (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"veor${p} $Vdn, $Vm", + (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vorr${p} $Vdn, $Vm", + (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vorr${p} $Vdn, $Vm", + (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", + (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", + (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", + (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", + (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", + (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", + (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VMUL two-operand aliases. +def : NEONInstAlias<"vmul${p}.p8 $Qdn, $Qm", + (VMULpq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i8 $Qdn, $Qm", + (VMULv16i8 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Qm", + (VMULv8i16 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Qm", + (VMULv4i32 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.p8 $Ddn, $Dm", + (VMULpd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i8 $Ddn, $Dm", + (VMULv8i8 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm", + (VMULv4i16 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm", + (VMULv2i32 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Qm", + (VMULfq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm", + (VMULfd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane", + (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm, + VectorIndex16:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane", + (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm, + VectorIndex16:$lane, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane", + (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane", + (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane", + (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane", + (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; + +// VQADD (register) two-operand aliases. +def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm", + (VQADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm", + (VQADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm", + (VQADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm", + (VQADDsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm", + (VQADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm", + (VQADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm", + (VQADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm", + (VQADDuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm", + (VQADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm", + (VQADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm", + (VQADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm", + (VQADDsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm", + (VQADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm", + (VQADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm", + (VQADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm", + (VQADDuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VSHL (immediate) two-operand aliases. +def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm", + (VSHLiv8i8 DPR:$Vdn, DPR:$Vdn, imm0_7:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm", + (VSHLiv4i16 DPR:$Vdn, DPR:$Vdn, imm0_15:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm", + (VSHLiv2i32 DPR:$Vdn, DPR:$Vdn, imm0_31:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm", + (VSHLiv1i64 DPR:$Vdn, DPR:$Vdn, imm0_63:$imm, pred:$p)>; + +def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm", + (VSHLiv16i8 QPR:$Vdn, QPR:$Vdn, imm0_7:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm", + (VSHLiv8i16 QPR:$Vdn, QPR:$Vdn, imm0_15:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm", + (VSHLiv4i32 QPR:$Vdn, QPR:$Vdn, imm0_31:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm", + (VSHLiv2i64 QPR:$Vdn, QPR:$Vdn, imm0_63:$imm, pred:$p)>; + +// VSHL (register) two-operand aliases. +def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm", + (VSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm", + (VSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm", + (VSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm", + (VSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm", + (VSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm", + (VSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", + (VSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", + (VSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm", + (VSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm", + (VSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm", + (VSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm", + (VSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm", + (VSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm", + (VSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", + (VSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", + (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VSHL (immediate) two-operand aliases. +def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", + (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", + (VSHRsv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm", + (VSHRsv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm", + (VSHRsv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", + (VSHRsv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", + (VSHRsv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm", + (VSHRsv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm", + (VSHRsv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm", + (VSHRuv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm", + (VSHRuv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", + (VSHRuv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", + (VSHRuv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm", + (VSHRuv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm", + (VSHRuv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", + (VSHRuv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", + (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; + +// VLD1 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +defm VLD1LNdAsm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD1LNdAsm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD1LNdAsm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +defm VLD1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr!", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr!", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr!", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD1LNdWB_register_Asm : + NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VLD1LNdWB_register_Asm : + NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VLD1LNdWB_register_Asm : + NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST1 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +defm VST1LNdAsm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST1LNdAsm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST1LNdAsm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +defm VST1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr!", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr!", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr!", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST1LNdWB_register_Asm : + NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VST1LNdWB_register_Asm : + NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VST1LNdWB_register_Asm : + NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD2 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +defm VLD2LNdAsm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD2LNdAsm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD2LNdAsm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +defm VLD2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr!", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr!", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr!", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD2LNdWB_register_Asm : + NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VLD2LNdWB_register_Asm : + NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VLD2LNdWB_register_Asm : + NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST2 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +defm VST2LNdAsm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST2LNdAsm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST2LNdAsm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +defm VST2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr!", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr!", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr!", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST2LNdWB_register_Asm : + NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VST2LNdWB_register_Asm : + NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VST2LNdWB_register_Asm : + NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VMOV takes an optional datatype suffix +defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", + (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", + (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; + +// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. +// D-register versions. +def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", + (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", + (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", + (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", + (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", + (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", + (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", + (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +// Q-register versions. +def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", + (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", + (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", + (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", + (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", + (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", + (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", + (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; + +// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. +// D-register versions. +def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", + (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", + (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", + (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", + (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", + (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", + (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", + (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +// Q-register versions. +def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", + (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", + (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", + (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", + (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", + (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", + (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", + (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; + +// Two-operand variants for VEXT +def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm", + (VEXTd8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_7:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm", + (VEXTd16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_3:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm", + (VEXTd32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_1:$imm, pred:$p)>; + +def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm", + (VEXTq8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_15:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm", + (VEXTq16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_7:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm", + (VEXTq32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_3:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.64 $Vdn, $Vm, $imm", + (VEXTq64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_1:$imm, pred:$p)>; + +// Two-operand variants for VQDMULH +def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", + (VQDMULHv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", + (VQDMULHv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", + (VQDMULHv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", + (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, +// these should restrict to just the Q register variants, but the register +// classes are enough to match correctly regardless, so we keep it simple +// and just use MnemonicAlias. +def : NEONMnemonicAlias<"vbicq", "vbic">; +def : NEONMnemonicAlias<"vandq", "vand">; +def : NEONMnemonicAlias<"veorq", "veor">; +def : NEONMnemonicAlias<"vorrq", "vorr">; + +def : NEONMnemonicAlias<"vmovq", "vmov">; +def : NEONMnemonicAlias<"vmvnq", "vmvn">; +// Explicit versions for floating point so that the FPImm variants get +// handled early. The parser gets confused otherwise. +def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; +def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; + +def : NEONMnemonicAlias<"vaddq", "vadd">; +def : NEONMnemonicAlias<"vsubq", "vsub">; + +def : NEONMnemonicAlias<"vminq", "vmin">; +def : NEONMnemonicAlias<"vmaxq", "vmax">; + +def : NEONMnemonicAlias<"vmulq", "vmul">; + +def : NEONMnemonicAlias<"vabsq", "vabs">; + +def : NEONMnemonicAlias<"vshlq", "vshl">; +def : NEONMnemonicAlias<"vshrq", "vshr">; + +def : NEONMnemonicAlias<"vcvtq", "vcvt">; + +def : NEONMnemonicAlias<"vcleq", "vcle">; +def : NEONMnemonicAlias<"vceqq", "vceq">; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index c6cc98d..ac1a229 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -1131,9 +1131,6 @@ def tRSB : // A8.6.141 "rsb", "\t$Rd, $Rn, #0", [(set tGPR:$Rd, (ineg tGPR:$Rn))]>; -def : tInstAlias<"neg${s}${p} $Rd, $Rm", - (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>; - // Subtract with carry register let Uses = [CPSR] in def tSBC : // A8.6.151 @@ -1435,3 +1432,8 @@ def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>; // nothing). def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; + +// "neg" is and alias for "rsb rd, rn, #0" +def : tInstAlias<"neg${s}${p} $Rd, $Rm", + (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>; + diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 6129fa3..981592c 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -80,18 +80,19 @@ def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{ // only used on aliases (Pat<> and InstAlias<>). The actual encoding // is handled by the destination instructions, which use t2_so_imm. def t2_so_imm_not_asmoperand : AsmOperandClass { let Name = "T2SOImmNot"; } -def t2_so_imm_not : Operand<i32>, - PatLeaf<(imm), [{ +def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{ return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1; }], t2_so_imm_not_XFORM> { let ParserMatchClass = t2_so_imm_not_asmoperand; } // t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm. -def t2_so_imm_neg : Operand<i32>, - PatLeaf<(imm), [{ +def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; } +def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1; -}], t2_so_imm_neg_XFORM>; +}], t2_so_imm_neg_XFORM> { + let ParserMatchClass = t2_so_imm_neg_asmoperand; +} /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095]. def imm0_4095 : Operand<i32>, @@ -1333,7 +1334,7 @@ def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), let mayStore = 1, neverHasSideEffects = 1 in { def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb), - (ins rGPR:$Rt, t2addrmode_imm8:$addr), + (ins GPRnopc:$Rt, t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, "str", "\t$Rt, $addr!", "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { @@ -1357,13 +1358,13 @@ def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb), } // mayStore = 1, neverHasSideEffects = 1 def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb), - (ins rGPR:$Rt, addr_offset_none:$Rn, + (ins GPRnopc:$Rt, addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iStore_iu, "str", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPRnopc:$Rn_wb, - (post_store rGPR:$Rt, addr_offset_none:$Rn, + (post_store GPRnopc:$Rt, addr_offset_none:$Rn, t2am_imm8_offset:$offset))]>; def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb), @@ -3971,6 +3972,18 @@ def : t2InstAlias<"push${p} $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>; def : t2InstAlias<"pop${p}.w $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>; def : t2InstAlias<"pop${p} $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>; +// STMIA/STMIA_UPD aliases w/o the optional .w suffix +def : t2InstAlias<"stm${p} $Rn, $regs", + (t2STMIA GPR:$Rn, pred:$p, reglist:$regs)>; +def : t2InstAlias<"stm${p} $Rn!, $regs", + (t2STMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>; + +// LDMIA/LDMIA_UPD aliases w/o the optional .w suffix +def : t2InstAlias<"ldm${p} $Rn, $regs", + (t2LDMIA GPR:$Rn, pred:$p, reglist:$regs)>; +def : t2InstAlias<"ldm${p} $Rn!, $regs", + (t2LDMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>; + // STMDB/STMDB_UPD aliases w/ the optional .w suffix def : t2InstAlias<"stmdb${p}.w $Rn, $regs", (t2STMDB GPR:$Rn, pred:$p, reglist:$regs)>; @@ -4084,8 +4097,50 @@ def : t2InstAlias<"sxth${p} $Rd, $Rm$rot", // for isel. def : t2InstAlias<"mov${p} $Rd, $imm", (t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>; +def : t2InstAlias<"mvn${p} $Rd, $imm", + (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>; +// Same for AND <--> BIC +def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm", + (t2ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstAlias<"bic${s}${p} $Rdn, $imm", + (t2ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm", + (t2BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstAlias<"and${s}${p} $Rdn, $imm", + (t2BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +// Likewise, "add Rd, t2_so_imm_neg" -> sub +def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", + (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, + pred:$p, cc_out:$s)>; +def : t2InstAlias<"add${s}${p} $Rd, $imm", + (t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm, + pred:$p, cc_out:$s)>; +// Same for CMP <--> CMN via t2_so_imm_neg +def : t2InstAlias<"cmp${p} $Rd, $imm", + (t2CMNzri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>; +def : t2InstAlias<"cmn${p} $Rd, $imm", + (t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>; // Wide 'mul' encoding can be specified with only two operands. def : t2InstAlias<"mul${p} $Rn, $Rm", - (t2MUL rGPR:$Rn, rGPR:$Rn, rGPR:$Rm, pred:$p)>; + (t2MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p)>; + +// "neg" is and alias for "rsb rd, rn, #0" +def : t2InstAlias<"neg${s}${p} $Rd, $Rm", + (t2RSBri rGPR:$Rd, rGPR:$Rm, 0, pred:$p, cc_out:$s)>; + +// MOV so_reg assembler pseudos. InstAlias isn't expressive enough for +// these, unfortunately. +def t2MOVsi: t2AsmPseudo<"mov${p} $Rd, $shift", + (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>; +def t2MOVSsi: t2AsmPseudo<"movs${p} $Rd, $shift", + (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>; + +// ADR w/o the .w suffix +def : t2InstAlias<"adr${p} $Rd, $addr", + (t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index e420135..5d43556 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -1160,18 +1160,64 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), //===----------------------------------------------------------------------===// // Assembler aliases. // +// A few mnemnoic aliases for pre-unifixed syntax. We don't guarantee to +// support them all, but supporting at least some of the basics is +// good to be friendly. +def : VFP2MnemonicAlias<"flds", "vldr">; +def : VFP2MnemonicAlias<"fldd", "vldr">; +def : VFP2MnemonicAlias<"fmrs", "vmov">; +def : VFP2MnemonicAlias<"fmsr", "vmov">; +def : VFP2MnemonicAlias<"fsqrts", "vsqrt">; +def : VFP2MnemonicAlias<"fsqrtd", "vsqrt">; +def : VFP2MnemonicAlias<"fadds", "vadd.f32">; +def : VFP2MnemonicAlias<"faddd", "vadd.f64">; +def : VFP2MnemonicAlias<"fmrdd", "vmov">; +def : VFP2MnemonicAlias<"fmrds", "vmov">; +def : VFP2MnemonicAlias<"fmrrd", "vmov">; +def : VFP2MnemonicAlias<"fmdrr", "vmov">; +def : VFP2MnemonicAlias<"fmuld", "vmul.f64">; +def : VFP2MnemonicAlias<"fnegs", "vneg.f32">; +def : VFP2MnemonicAlias<"fnegd", "vneg.f64">; +def : VFP2MnemonicAlias<"ftosizd", "vcvt.s32.f64">; +def : VFP2MnemonicAlias<"ftosid", "vcvtr.s32.f64">; +def : VFP2MnemonicAlias<"ftosizs", "vcvt.s32.f32">; +def : VFP2MnemonicAlias<"ftosis", "vcvtr.s32.f32">; +def : VFP2MnemonicAlias<"ftouizd", "vcvt.u32.f64">; +def : VFP2MnemonicAlias<"ftouid", "vcvtr.u32.f64">; +def : VFP2MnemonicAlias<"ftouizs", "vcvt.u32.f32">; +def : VFP2MnemonicAlias<"ftouis", "vcvtr.u32.f32">; +def : VFP2MnemonicAlias<"fsitod", "vcvt.f64.s32">; +def : VFP2MnemonicAlias<"fsitos", "vcvt.f32.s32">; +def : VFP2MnemonicAlias<"fuitod", "vcvt.f64.u32">; +def : VFP2MnemonicAlias<"fuitos", "vcvt.f32.u32">; +def : VFP2MnemonicAlias<"fsts", "vstr">; +def : VFP2MnemonicAlias<"fstd", "vstr">; +def : VFP2MnemonicAlias<"fmacd", "vmla.f64">; +def : VFP2MnemonicAlias<"fmacs", "vmla.f32">; def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>; +def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm", + (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>; +def : VFP2InstAlias<"faddd${p} $Dd, $Dn, $Dm", + (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm", + (VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>; +def : VFP2InstAlias<"fsubd${p} $Dd, $Dn, $Dm", + (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; + +// No need for the size suffix on VSQRT. It's implied by the register classes. +def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>; +def : VFP2InstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>; // VLDR/VSTR accept an optional type suffix. -defm : VFPDT32InstAlias<"vldr${p}", "$Sd, $addr", - (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>; -defm : VFPDT32InstAlias<"vstr${p}", "$Sd, $addr", - (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>; -defm : VFPDT64InstAlias<"vldr${p}", "$Dd, $addr", - (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>; -defm : VFPDT64InstAlias<"vstr${p}", "$Dd, $addr", - (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>; +def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr", + (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>; +def : VFP2InstAlias<"vstr${p}.32 $Sd, $addr", + (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>; +def : VFP2InstAlias<"vldr${p}.64 $Dd, $addr", + (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>; +def : VFP2InstAlias<"vstr${p}.64 $Dd, $addr", + (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>; // VMUL has a two-operand form (implied destination operand) def : VFP2InstAlias<"vmul${p}.f64 $Dn, $Dm", diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index c8728f4..6712fb6 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -33,6 +33,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -1471,19 +1472,18 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, while (++I != E) { if (I->isDebugValue() || MemOps.count(&*I)) continue; - const MCInstrDesc &MCID = I->getDesc(); - if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects()) + if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) return false; - if (isLd && MCID.mayStore()) + if (isLd && I->mayStore()) return false; if (!isLd) { - if (MCID.mayLoad()) + if (I->mayLoad()) return false; // It's not safe to move the first 'str' down. // str r1, [r0] // strh r5, [r0] // str r4, [r0, #+4] - if (MCID.mayStore()) + if (I->mayStore()) return false; } for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { @@ -1773,8 +1773,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { while (MBBI != E) { for (; MBBI != E; ++MBBI) { MachineInstr *MI = MBBI; - const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.isCall() || MCID.isTerminator()) { + if (MI->isCall() || MI->isTerminator()) { // Stop at barriers. ++MBBI; break; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 6cbb24b..61b75cb 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -38,22 +38,25 @@ extern "C" void LLVMInitializeARMTarget() { /// ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS), JITInfo(), InstrItins(Subtarget.getInstrItineraryData()) { // Default to soft float ABI - if (FloatABIType == FloatABI::Default) - FloatABIType = FloatABI::Soft; + if (Options.FloatABIType == FloatABI::Default) + this->Options.FloatABIType = FloatABI::Soft; } ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM, OL), InstrInfo(Subtarget), + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + InstrInfo(Subtarget), DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:64-i64:32:64-" "v128:32:128-v64:32:64-n32-S32") : @@ -73,9 +76,10 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM, OL), + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), InstrInfo(Subtarget.hasThumb2() ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget)) : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), @@ -143,10 +147,16 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM) { } bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM) { - if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb()) - PM.add(createThumb2SizeReductionPass()); + if (Subtarget.isThumb2()) { + if (!Subtarget.prefers32BitThumb()) + PM.add(createThumb2SizeReductionPass()); + + // Constant island pass work on unbundled instructions. + PM.add(createUnpackMachineBundlesPass()); + } PM.add(createARMConstantIslandPass()); + return true; } diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index a1f517b..cd77822 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -41,6 +41,7 @@ private: public: ARMBaseTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); @@ -71,6 +72,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine { public: ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); @@ -112,6 +114,7 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { public: ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 19defa1..721a225 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -36,6 +36,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, ELF::SHF_WRITE | ELF::SHF_ALLOC, SectionKind::getDataRel()); + StructorOutputOrder = Structors::PriorityOrder; LSDASection = NULL; } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index bb83e5e..cd86065 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -39,10 +39,15 @@ namespace { class ARMOperand; +enum VectorLaneTy { NoLanes, AllLanes, IndexedLane }; + class ARMAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; + // Map of register aliases registers via the .req directive. + StringMap<unsigned> RegisterReqs; + struct { ARMCC::CondCodes Cond; // Condition for IT block. unsigned Mask:4; // Condition mask for instructions. @@ -90,9 +95,12 @@ class ARMAsmParser : public MCTargetAsmParser { unsigned &ShiftAmount); bool parseDirectiveWord(unsigned Size, SMLoc L); bool parseDirectiveThumb(SMLoc L); + bool parseDirectiveARM(SMLoc L); bool parseDirectiveThumbFunc(SMLoc L); bool parseDirectiveCode(SMLoc L); bool parseDirectiveSyntax(SMLoc L); + bool parseDirectiveReq(StringRef Name, SMLoc L); + bool parseDirectiveUnreq(SMLoc L); StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode, bool &CarrySetting, unsigned &ProcessorIMod, @@ -161,6 +169,7 @@ class ARMAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index); // Asm Match Converter Methods bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode, @@ -271,6 +280,8 @@ class ARMOperand : public MCParsedAsmOperand { k_DPRRegisterList, k_SPRRegisterList, k_VectorList, + k_VectorListAllLanes, + k_VectorListIndexed, k_ShiftedRegister, k_ShiftedImmediate, k_ShifterImmediate, @@ -324,6 +335,8 @@ class ARMOperand : public MCParsedAsmOperand { struct { unsigned RegNum; unsigned Count; + unsigned LaneIndex; + bool isDoubleSpaced; } VectorList; struct { @@ -409,6 +422,8 @@ public: Registers = o.Registers; break; case k_VectorList: + case k_VectorListAllLanes: + case k_VectorListIndexed: VectorList = o.VectorList; break; case k_CoprocNum: @@ -562,6 +577,22 @@ public: int64_t Value = CE->getValue(); return Value >= 0 && Value < 256; } + bool isImm0_1() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 2; + } + bool isImm0_3() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 4; + } bool isImm0_7() const { if (Kind != k_Immediate) return false; @@ -586,6 +617,94 @@ public: int64_t Value = CE->getValue(); return Value >= 0 && Value < 32; } + bool isImm0_63() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 64; + } + bool isImm8() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value == 8; + } + bool isImm16() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value == 16; + } + bool isImm32() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value == 32; + } + bool isShrImm8() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= 8; + } + bool isShrImm16() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= 16; + } + bool isShrImm32() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= 32; + } + bool isShrImm64() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= 64; + } + bool isImm1_7() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value < 8; + } + bool isImm1_15() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value < 16; + } + bool isImm1_31() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value < 32; + } bool isImm1_16() const { if (Kind != k_Immediate) return false; @@ -676,6 +795,14 @@ public: int64_t Value = CE->getValue(); return ARM_AM::getSOImmVal(~Value) != -1; } + bool isARMSOImmNeg() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ARM_AM::getSOImmVal(-Value) != -1; + } bool isT2SOImm() const { if (Kind != k_Immediate) return false; @@ -692,6 +819,14 @@ public: int64_t Value = CE->getValue(); return ARM_AM::getT2SOImmVal(~Value) != -1; } + bool isT2SOImmNeg() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ARM_AM::getT2SOImmVal(-Value) != -1; + } bool isSetEndImm() const { if (Kind != k_Immediate) return false; @@ -892,9 +1027,9 @@ public: if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Immediate offset in range [-255, -1]. - if (!Memory.OffsetImm) return true; + if (!Memory.OffsetImm) return false; int64_t Val = Memory.OffsetImm->getValue(); - return Val > -256 && Val < 0; + return (Val == INT32_MIN) || (Val > -256 && Val < 0); } bool isMemUImm12Offset() const { if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) @@ -940,31 +1075,75 @@ public: bool isProcIFlags() const { return Kind == k_ProcIFlags; } // NEON operands. + bool isSingleSpacedVectorList() const { + return Kind == k_VectorList && !VectorList.isDoubleSpaced; + } + bool isDoubleSpacedVectorList() const { + return Kind == k_VectorList && VectorList.isDoubleSpaced; + } bool isVecListOneD() const { - if (Kind != k_VectorList) return false; + if (!isSingleSpacedVectorList()) return false; return VectorList.Count == 1; } bool isVecListTwoD() const { - if (Kind != k_VectorList) return false; + if (!isSingleSpacedVectorList()) return false; return VectorList.Count == 2; } bool isVecListThreeD() const { - if (Kind != k_VectorList) return false; + if (!isSingleSpacedVectorList()) return false; return VectorList.Count == 3; } bool isVecListFourD() const { - if (Kind != k_VectorList) return false; + if (!isSingleSpacedVectorList()) return false; return VectorList.Count == 4; } bool isVecListTwoQ() const { - if (Kind != k_VectorList) return false; - //FIXME: We haven't taught the parser to handle by-two register lists - // yet, so don't pretend to know one. - return VectorList.Count == 2 && false; + if (!isDoubleSpacedVectorList()) return false; + return VectorList.Count == 2; + } + + bool isVecListOneDAllLanes() const { + if (Kind != k_VectorListAllLanes) return false; + return VectorList.Count == 1; + } + + bool isVecListTwoDAllLanes() const { + if (Kind != k_VectorListAllLanes) return false; + return VectorList.Count == 2; + } + + bool isVecListOneDByteIndexed() const { + if (Kind != k_VectorListIndexed) return false; + return VectorList.Count == 1 && VectorList.LaneIndex <= 7; + } + + bool isVecListOneDHWordIndexed() const { + if (Kind != k_VectorListIndexed) return false; + return VectorList.Count == 1 && VectorList.LaneIndex <= 3; + } + + bool isVecListOneDWordIndexed() const { + if (Kind != k_VectorListIndexed) return false; + return VectorList.Count == 1 && VectorList.LaneIndex <= 1; + } + + bool isVecListTwoDByteIndexed() const { + if (Kind != k_VectorListIndexed) return false; + return VectorList.Count == 2 && VectorList.LaneIndex <= 7; + } + + bool isVecListTwoDHWordIndexed() const { + if (Kind != k_VectorListIndexed) return false; + return VectorList.Count == 2 && VectorList.LaneIndex <= 3; + } + + bool isVecListTwoDWordIndexed() const { + if (Kind != k_VectorListIndexed) return false; + return VectorList.Count == 2 && VectorList.LaneIndex <= 1; } bool isVectorIndex8() const { @@ -1233,6 +1412,14 @@ public: Inst.addOperand(MCOperand::CreateImm(~CE->getValue())); } + void addT2SOImmNegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The operand is actually a t2_so_imm, but we have its + // negation in the assembly source, so twiddle it here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(-CE->getValue())); + } + void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The operand is actually a so_imm, but we have its bitwise @@ -1241,6 +1428,14 @@ public: Inst.addOperand(MCOperand::CreateImm(~CE->getValue())); } + void addARMSOImmNegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The operand is actually a so_imm, but we have its + // negation in the assembly source, so twiddle it here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(-CE->getValue())); + } + void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt()))); @@ -1527,37 +1722,15 @@ public: Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags()))); } - void addVecListOneDOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); - } - - void addVecListTwoDOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - // Only the first register actually goes on the instruction. The rest - // are implied by the opcode. - Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); - } - - void addVecListThreeDOperands(MCInst &Inst, unsigned N) const { + void addVecListOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - // Only the first register actually goes on the instruction. The rest - // are implied by the opcode. Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); } - void addVecListFourDOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - // Only the first register actually goes on the instruction. The rest - // are implied by the opcode. - Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); - } - - void addVecListTwoQOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - // Only the first register actually goes on the instruction. The rest - // are implied by the opcode. + void addVecListIndexedOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); + Inst.addOperand(MCOperand::CreateImm(VectorList.LaneIndex)); } void addVectorIndex8Operands(MCInst &Inst, unsigned N) const { @@ -1780,10 +1953,32 @@ public: } static ARMOperand *CreateVectorList(unsigned RegNum, unsigned Count, - SMLoc S, SMLoc E) { + bool isDoubleSpaced, SMLoc S, SMLoc E) { ARMOperand *Op = new ARMOperand(k_VectorList); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; + Op->VectorList.isDoubleSpaced = isDoubleSpaced; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static ARMOperand *CreateVectorListAllLanes(unsigned RegNum, unsigned Count, + SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(k_VectorListAllLanes); + Op->VectorList.RegNum = RegNum; + Op->VectorList.Count = Count; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static ARMOperand *CreateVectorListIndexed(unsigned RegNum, unsigned Count, + unsigned Index, SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(k_VectorListIndexed); + Op->VectorList.RegNum = RegNum; + Op->VectorList.Count = Count; + Op->VectorList.LaneIndex = Index; Op->StartLoc = S; Op->EndLoc = E; return Op; @@ -1982,6 +2177,14 @@ void ARMOperand::print(raw_ostream &OS) const { OS << "<vector_list " << VectorList.Count << " * " << VectorList.RegNum << ">"; break; + case k_VectorListAllLanes: + OS << "<vector_list(all lanes) " << VectorList.Count << " * " + << VectorList.RegNum << ">"; + break; + case k_VectorListIndexed: + OS << "<vector_list(lane " << VectorList.LaneIndex << ") " + << VectorList.Count << " * " << VectorList.RegNum << ">"; + break; case k_Token: OS << "'" << getToken() << "'"; break; @@ -2000,7 +2203,9 @@ static unsigned MatchRegisterName(StringRef Name); bool ARMAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { + StartLoc = Parser.getTok().getLoc(); RegNo = tryParseRegister(); + EndLoc = Parser.getTok().getLoc(); return (RegNo == (unsigned)-1); } @@ -2013,8 +2218,6 @@ int ARMAsmParser::tryParseRegister() { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) return -1; - // FIXME: Validate register for the current architecture; we have to do - // validation later, so maybe there is no need for this here. std::string lowerCase = Tok.getString().lower(); unsigned RegNum = MatchRegisterName(lowerCase); if (!RegNum) { @@ -2023,9 +2226,34 @@ int ARMAsmParser::tryParseRegister() { .Case("r14", ARM::LR) .Case("r15", ARM::PC) .Case("ip", ARM::R12) + // Additional register name aliases for 'gas' compatibility. + .Case("a1", ARM::R0) + .Case("a2", ARM::R1) + .Case("a3", ARM::R2) + .Case("a4", ARM::R3) + .Case("v1", ARM::R4) + .Case("v2", ARM::R5) + .Case("v3", ARM::R6) + .Case("v4", ARM::R7) + .Case("v5", ARM::R8) + .Case("v6", ARM::R9) + .Case("v7", ARM::R10) + .Case("v8", ARM::R11) + .Case("sb", ARM::R9) + .Case("sl", ARM::R10) + .Case("fp", ARM::R11) .Default(0); } - if (!RegNum) return -1; + if (!RegNum) { + // Check for aliases registered via .req. + StringMap<unsigned>::const_iterator Entry = + RegisterReqs.find(Tok.getIdentifier()); + // If no match, return failure. + if (Entry == RegisterReqs.end()) + return -1; + Parser.Lex(); // Eat identifier token. + return Entry->getValue(); + } Parser.Lex(); // Eat identifier token. @@ -2045,6 +2273,7 @@ int ARMAsmParser::tryParseShiftRegister( std::string lowerCase = Tok.getString().lower(); ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase) + .Case("asl", ARM_AM::lsl) .Case("lsl", ARM_AM::lsl) .Case("lsr", ARM_AM::lsr) .Case("asr", ARM_AM::asr) @@ -2073,7 +2302,8 @@ int ARMAsmParser::tryParseShiftRegister( ShiftReg = SrcReg; } else { // Figure out if this is shifted by a constant or a register (for non-RRX). - if (Parser.getTok().is(AsmToken::Hash)) { + if (Parser.getTok().is(AsmToken::Hash) || + Parser.getTok().is(AsmToken::Dollar)) { Parser.Lex(); // Eat hash. SMLoc ImmLoc = Parser.getTok().getLoc(); const MCExpr *ShiftExpr = 0; @@ -2446,6 +2676,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat the comma. RegLoc = Parser.getTok().getLoc(); int OldReg = Reg; + const AsmToken RegTok = Parser.getTok(); Reg = tryParseRegister(); if (Reg == -1) return Error(RegLoc, "register expected"); @@ -2459,8 +2690,13 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!RC->contains(Reg)) return Error(RegLoc, "invalid register in register list"); // List must be monotonically increasing. - if (getARMRegisterNumbering(Reg) <= getARMRegisterNumbering(OldReg)) + if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) return Error(RegLoc, "register list not in ascending order"); + if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) { + Warning(RegLoc, "duplicated register (" + RegTok.getString() + + ") in register list"); + continue; + } // VFP register lists must also be contiguous. // It's OK to use the enumeration values directly here rather, as the // VFP register classes have the enum sorted properly. @@ -2477,13 +2713,55 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return Error(E, "'}' expected"); Parser.Lex(); // Eat '}' token. + // Push the register list operand. Operands.push_back(ARMOperand::CreateRegList(Registers, S, E)); + + // The ARM system instruction variants for LDM/STM have a '^' token here. + if (Parser.getTok().is(AsmToken::Caret)) { + Operands.push_back(ARMOperand::CreateToken("^",Parser.getTok().getLoc())); + Parser.Lex(); // Eat '^' token. + } + return false; } +// Helper function to parse the lane index for vector lists. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) { + Index = 0; // Always return a defined index value. + if (Parser.getTok().is(AsmToken::LBrac)) { + Parser.Lex(); // Eat the '['. + if (Parser.getTok().is(AsmToken::RBrac)) { + // "Dn[]" is the 'all lanes' syntax. + LaneKind = AllLanes; + Parser.Lex(); // Eat the ']'. + return MatchOperand_Success; + } + if (Parser.getTok().is(AsmToken::Integer)) { + int64_t Val = Parser.getTok().getIntVal(); + // Make this range check context sensitive for .8, .16, .32. + if (Val < 0 && Val > 7) + Error(Parser.getTok().getLoc(), "lane index out of range"); + Index = Val; + LaneKind = IndexedLane; + Parser.Lex(); // Eat the token; + if (Parser.getTok().isNot(AsmToken::RBrac)) + Error(Parser.getTok().getLoc(), "']' expected"); + Parser.Lex(); // Eat the ']'. + return MatchOperand_Success; + } + Error(Parser.getTok().getLoc(), "lane index must be empty or an integer"); + return MatchOperand_ParseFail; + } + LaneKind = NoLanes; + return MatchOperand_Success; +} + // parse a vector register list ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + VectorLaneTy LaneKind; + unsigned LaneIndex; SMLoc S = Parser.getTok().getLoc(); // As an extension (to match gas), support a plain D register or Q register // (without encosing curly braces) as a single or double entry list, @@ -2494,12 +2772,48 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_NoMatch; SMLoc E = Parser.getTok().getLoc(); if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) { - Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, S, E)); + OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex); + if (Res != MatchOperand_Success) + return Res; + switch (LaneKind) { + default: + assert(0 && "unexpected lane kind!"); + case NoLanes: + E = Parser.getTok().getLoc(); + Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E)); + break; + case AllLanes: + E = Parser.getTok().getLoc(); + Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, S, E)); + break; + case IndexedLane: + Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 1, + LaneIndex, S,E)); + break; + } return MatchOperand_Success; } if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { Reg = getDRegFromQReg(Reg); - Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, S, E)); + OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex); + if (Res != MatchOperand_Success) + return Res; + switch (LaneKind) { + default: + assert(0 && "unexpected lane kind!"); + case NoLanes: + E = Parser.getTok().getLoc(); + Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E)); + break; + case AllLanes: + E = Parser.getTok().getLoc(); + Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, S, E)); + break; + case IndexedLane: + Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 2, + LaneIndex, S,E)); + break; + } return MatchOperand_Success; } Error(S, "vector register expected"); @@ -2518,18 +2832,30 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_ParseFail; } unsigned Count = 1; + int Spacing = 0; unsigned FirstReg = Reg; // The list is of D registers, but we also allow Q regs and just interpret // them as the two D sub-registers. if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { FirstReg = Reg = getDRegFromQReg(Reg); + Spacing = 1; // double-spacing requires explicit D registers, otherwise + // it's ambiguous with four-register single spaced. ++Reg; ++Count; } + if (parseVectorLane(LaneKind, LaneIndex) != MatchOperand_Success) + return MatchOperand_ParseFail; while (Parser.getTok().is(AsmToken::Comma) || Parser.getTok().is(AsmToken::Minus)) { if (Parser.getTok().is(AsmToken::Minus)) { + if (!Spacing) + Spacing = 1; // Register range implies a single spaced list. + else if (Spacing == 2) { + Error(Parser.getTok().getLoc(), + "sequential registers in double spaced list"); + return MatchOperand_ParseFail; + } Parser.Lex(); // Eat the minus. SMLoc EndLoc = Parser.getTok().getLoc(); int EndReg = tryParseRegister(); @@ -2554,6 +2880,16 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Error(EndLoc, "bad range in register list"); return MatchOperand_ParseFail; } + // Parse the lane specifier if present. + VectorLaneTy NextLaneKind; + unsigned NextLaneIndex; + if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + return MatchOperand_ParseFail; + if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { + Error(EndLoc, "mismatched lane index in register list"); + return MatchOperand_ParseFail; + } + EndLoc = Parser.getTok().getLoc(); // Add all the registers in the range to the register list. Count += EndReg - Reg; @@ -2575,6 +2911,13 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // The list is of D registers, but we also allow Q regs and just interpret // them as the two D sub-registers. if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { + if (!Spacing) + Spacing = 1; // Register range implies a single spaced list. + else if (Spacing == 2) { + Error(RegLoc, + "invalid register in double-spaced list (must be 'D' register')"); + return MatchOperand_ParseFail; + } Reg = getDRegFromQReg(Reg); if (Reg != OldReg + 1) { Error(RegLoc, "non-contiguous register range"); @@ -2582,14 +2925,45 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } ++Reg; Count += 2; + // Parse the lane specifier if present. + VectorLaneTy NextLaneKind; + unsigned NextLaneIndex; + SMLoc EndLoc = Parser.getTok().getLoc(); + if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + return MatchOperand_ParseFail; + if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { + Error(EndLoc, "mismatched lane index in register list"); + return MatchOperand_ParseFail; + } continue; } - // Normal D register. Just check that it's contiguous and keep going. - if (Reg != OldReg + 1) { + // Normal D register. + // Figure out the register spacing (single or double) of the list if + // we don't know it already. + if (!Spacing) + Spacing = 1 + (Reg == OldReg + 2); + + // Just check that it's contiguous and keep going. + if (Reg != OldReg + Spacing) { Error(RegLoc, "non-contiguous register range"); return MatchOperand_ParseFail; } ++Count; + // Parse the lane specifier if present. + VectorLaneTy NextLaneKind; + unsigned NextLaneIndex; + SMLoc EndLoc = Parser.getTok().getLoc(); + if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + return MatchOperand_ParseFail; + if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { + Error(EndLoc, "mismatched lane index in register list"); + return MatchOperand_ParseFail; + } + if (Spacing == 2 && LaneKind != NoLanes) { + Error(EndLoc, + "lane index specfier invalid in double spaced register list"); + return MatchOperand_ParseFail; + } } SMLoc E = Parser.getTok().getLoc(); @@ -2599,7 +2973,22 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } Parser.Lex(); // Eat '}' token. - Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count, S, E)); + switch (LaneKind) { + default: + assert(0 && "unexpected lane kind in register list."); + case NoLanes: + Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count, + (Spacing == 2), S, E)); + break; + case AllLanes: + Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count, + S, E)); + break; + case IndexedLane: + Operands.push_back(ARMOperand::CreateVectorListIndexed(FirstReg, Count, + LaneIndex, S, E)); + break; + } return MatchOperand_Success; } @@ -2786,7 +3175,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op, Parser.Lex(); // Eat shift type token. // There must be a '#' and a shift amount. - if (Parser.getTok().isNot(AsmToken::Hash)) { + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { Error(Parser.getTok().getLoc(), "'#' expected"); return MatchOperand_ParseFail; } @@ -2864,7 +3254,8 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat the operator. // A '#' and a shift amount. - if (Parser.getTok().isNot(AsmToken::Hash)) { + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { Error(Parser.getTok().getLoc(), "'#' expected"); return MatchOperand_ParseFail; } @@ -2924,7 +3315,8 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat the operator. // A '#' and a rotate amount. - if (Parser.getTok().isNot(AsmToken::Hash)) { + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { Error(Parser.getTok().getLoc(), "'#' expected"); return MatchOperand_ParseFail; } @@ -2961,7 +3353,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); // The bitfield descriptor is really two operands, the LSB and the width. - if (Parser.getTok().isNot(AsmToken::Hash)) { + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { Error(Parser.getTok().getLoc(), "'#' expected"); return MatchOperand_ParseFail; } @@ -2993,7 +3386,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_ParseFail; } Parser.Lex(); // Eat hash token. - if (Parser.getTok().isNot(AsmToken::Hash)) { + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { Error(Parser.getTok().getLoc(), "'#' expected"); return MatchOperand_ParseFail; } @@ -3087,7 +3481,8 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Tok.getLoc(); // Do immediates first, as we always parse those if we have a '#'. - if (Parser.getTok().is(AsmToken::Hash)) { + if (Parser.getTok().is(AsmToken::Hash) || + Parser.getTok().is(AsmToken::Dollar)) { Parser.Lex(); // Eat the '#'. // Explicitly look for a '-', as we need to encode negative zero // differently. @@ -3444,7 +3839,7 @@ bool ARMAsmParser:: cvtVLDwbFixed(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Vd - ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); // Vn @@ -3458,7 +3853,7 @@ bool ARMAsmParser:: cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Vd - ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); // Vn @@ -3478,7 +3873,7 @@ cvtVSTwbFixed(MCInst &Inst, unsigned Opcode, // Vn ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); // Vt - ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); return true; @@ -3494,7 +3889,7 @@ cvtVSTwbRegister(MCInst &Inst, unsigned Opcode, // Vm ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); // Vt - ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); return true; @@ -3591,8 +3986,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // offset. Be friendly and also accept a plain integer (without a leading // hash) for gas compatibility. if (Parser.getTok().is(AsmToken::Hash) || + Parser.getTok().is(AsmToken::Dollar) || Parser.getTok().is(AsmToken::Integer)) { - if (Parser.getTok().is(AsmToken::Hash)) + if (Parser.getTok().isNot(AsmToken::Integer)) Parser.Lex(); // Eat the '#'. E = Parser.getTok().getLoc(); @@ -3690,7 +4086,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, if (Tok.isNot(AsmToken::Identifier)) return true; StringRef ShiftName = Tok.getString(); - if (ShiftName == "lsl" || ShiftName == "LSL") + if (ShiftName == "lsl" || ShiftName == "LSL" || + ShiftName == "asl" || ShiftName == "ASL") St = ARM_AM::lsl; else if (ShiftName == "lsr" || ShiftName == "LSR") St = ARM_AM::lsr; @@ -3710,7 +4107,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, Loc = Parser.getTok().getLoc(); // A '#' and a shift amount. const AsmToken &HashTok = Parser.getTok(); - if (HashTok.isNot(AsmToken::Hash)) + if (HashTok.isNot(AsmToken::Hash) && + HashTok.isNot(AsmToken::Dollar)) return Error(HashTok.getLoc(), "'#' expected"); Parser.Lex(); // Eat hash token. @@ -3739,7 +4137,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); - if (Parser.getTok().isNot(AsmToken::Hash)) + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; // Disambiguate the VMOV forms that can accept an FP immediate. @@ -3852,6 +4251,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, return parseMemory(Operands); case AsmToken::LCurly: return parseRegisterList(Operands); + case AsmToken::Dollar: case AsmToken::Hash: { // #42 -> immediate. // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate @@ -3990,7 +4390,9 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "mrs" || Mnemonic == "smmls" || Mnemonic == "vabs" || Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vmrs" || Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" || - Mnemonic == "vrsqrts" || Mnemonic == "srs" || + Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" || + Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" || + Mnemonic == "fsts" || (Mnemonic == "movs" && isThumb()))) { Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1); CarrySetting = true; @@ -4206,9 +4608,27 @@ static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) { return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm"); } +static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features); /// Parse an arm instruction mnemonic followed by its operands. bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Apply mnemonic aliases before doing anything else, as the destination + // mnemnonic may include suffices and we want to handle them normally. + // The generic tblgen'erated code does this later, at the start of + // MatchInstructionImpl(), but that's too late for aliases that include + // any sort of suffix. + unsigned AvailableFeatures = getAvailableFeatures(); + applyMnemonicAliases(Name, AvailableFeatures); + + // First check for the ARM-specific .req directive. + if (Parser.getTok().is(AsmToken::Identifier) && + Parser.getTok().getIdentifier() == ".req") { + parseDirectiveReq(Name, NameLoc); + // We always return 'error' for this, as we're done with this + // statement and don't need to match the 'instruction." + return true; + } + // Create the leading tokens for the mnemonic, split by '.' characters. size_t Start = 0, Next = Name.find('.'); StringRef Mnemonic = Name.slice(Start, Next); @@ -4400,12 +4820,21 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, } } // Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the - // end. Convert it to a token here. + // end. Convert it to a token here. Take care not to convert those + // that should hit the Thumb2 encoding. if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 && + static_cast<ARMOperand*>(Operands[3])->isReg() && + static_cast<ARMOperand*>(Operands[4])->isReg() && static_cast<ARMOperand*>(Operands[5])->isImm()) { ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]); const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); - if (CE && CE->getValue() == 0) { + if (CE && CE->getValue() == 0 && + (isThumbOne() || + // The cc_out operand matches the IT block. + ((inITBlock() != CarrySetting) && + // Neither register operand is a high register. + (isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) && + isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()))))){ Operands.erase(Operands.begin() + 5); Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc())); delete Op; @@ -4605,11 +5034,495 @@ validateInstruction(MCInst &Inst, return false; } +static unsigned getRealVSTLNOpcode(unsigned Opc) { + switch(Opc) { + default: assert(0 && "unexpected opcode!"); + // VST1LN + case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_P8: + case ARM::VST1LNdWB_fixed_Asm_I8: case ARM::VST1LNdWB_fixed_Asm_S8: + case ARM::VST1LNdWB_fixed_Asm_U8: + return ARM::VST1LNd8_UPD; + case ARM::VST1LNdWB_fixed_Asm_16: case ARM::VST1LNdWB_fixed_Asm_P16: + case ARM::VST1LNdWB_fixed_Asm_I16: case ARM::VST1LNdWB_fixed_Asm_S16: + case ARM::VST1LNdWB_fixed_Asm_U16: + return ARM::VST1LNd16_UPD; + case ARM::VST1LNdWB_fixed_Asm_32: case ARM::VST1LNdWB_fixed_Asm_F: + case ARM::VST1LNdWB_fixed_Asm_F32: case ARM::VST1LNdWB_fixed_Asm_I32: + case ARM::VST1LNdWB_fixed_Asm_S32: case ARM::VST1LNdWB_fixed_Asm_U32: + return ARM::VST1LNd32_UPD; + case ARM::VST1LNdWB_register_Asm_8: case ARM::VST1LNdWB_register_Asm_P8: + case ARM::VST1LNdWB_register_Asm_I8: case ARM::VST1LNdWB_register_Asm_S8: + case ARM::VST1LNdWB_register_Asm_U8: + return ARM::VST1LNd8_UPD; + case ARM::VST1LNdWB_register_Asm_16: case ARM::VST1LNdWB_register_Asm_P16: + case ARM::VST1LNdWB_register_Asm_I16: case ARM::VST1LNdWB_register_Asm_S16: + case ARM::VST1LNdWB_register_Asm_U16: + return ARM::VST1LNd16_UPD; + case ARM::VST1LNdWB_register_Asm_32: case ARM::VST1LNdWB_register_Asm_F: + case ARM::VST1LNdWB_register_Asm_F32: case ARM::VST1LNdWB_register_Asm_I32: + case ARM::VST1LNdWB_register_Asm_S32: case ARM::VST1LNdWB_register_Asm_U32: + return ARM::VST1LNd32_UPD; + case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_P8: + case ARM::VST1LNdAsm_I8: case ARM::VST1LNdAsm_S8: + case ARM::VST1LNdAsm_U8: + return ARM::VST1LNd8; + case ARM::VST1LNdAsm_16: case ARM::VST1LNdAsm_P16: + case ARM::VST1LNdAsm_I16: case ARM::VST1LNdAsm_S16: + case ARM::VST1LNdAsm_U16: + return ARM::VST1LNd16; + case ARM::VST1LNdAsm_32: case ARM::VST1LNdAsm_F: + case ARM::VST1LNdAsm_F32: case ARM::VST1LNdAsm_I32: + case ARM::VST1LNdAsm_S32: case ARM::VST1LNdAsm_U32: + return ARM::VST1LNd32; + + // VST2LN + case ARM::VST2LNdWB_fixed_Asm_8: case ARM::VST2LNdWB_fixed_Asm_P8: + case ARM::VST2LNdWB_fixed_Asm_I8: case ARM::VST2LNdWB_fixed_Asm_S8: + case ARM::VST2LNdWB_fixed_Asm_U8: + return ARM::VST2LNd8_UPD; + case ARM::VST2LNdWB_fixed_Asm_16: case ARM::VST2LNdWB_fixed_Asm_P16: + case ARM::VST2LNdWB_fixed_Asm_I16: case ARM::VST2LNdWB_fixed_Asm_S16: + case ARM::VST2LNdWB_fixed_Asm_U16: + return ARM::VST2LNd16_UPD; + case ARM::VST2LNdWB_fixed_Asm_32: case ARM::VST2LNdWB_fixed_Asm_F: + case ARM::VST2LNdWB_fixed_Asm_F32: case ARM::VST2LNdWB_fixed_Asm_I32: + case ARM::VST2LNdWB_fixed_Asm_S32: case ARM::VST2LNdWB_fixed_Asm_U32: + return ARM::VST2LNd32_UPD; + case ARM::VST2LNdWB_register_Asm_8: case ARM::VST2LNdWB_register_Asm_P8: + case ARM::VST2LNdWB_register_Asm_I8: case ARM::VST2LNdWB_register_Asm_S8: + case ARM::VST2LNdWB_register_Asm_U8: + return ARM::VST2LNd8_UPD; + case ARM::VST2LNdWB_register_Asm_16: case ARM::VST2LNdWB_register_Asm_P16: + case ARM::VST2LNdWB_register_Asm_I16: case ARM::VST2LNdWB_register_Asm_S16: + case ARM::VST2LNdWB_register_Asm_U16: + return ARM::VST2LNd16_UPD; + case ARM::VST2LNdWB_register_Asm_32: case ARM::VST2LNdWB_register_Asm_F: + case ARM::VST2LNdWB_register_Asm_F32: case ARM::VST2LNdWB_register_Asm_I32: + case ARM::VST2LNdWB_register_Asm_S32: case ARM::VST2LNdWB_register_Asm_U32: + return ARM::VST2LNd32_UPD; + case ARM::VST2LNdAsm_8: case ARM::VST2LNdAsm_P8: + case ARM::VST2LNdAsm_I8: case ARM::VST2LNdAsm_S8: + case ARM::VST2LNdAsm_U8: + return ARM::VST2LNd8; + case ARM::VST2LNdAsm_16: case ARM::VST2LNdAsm_P16: + case ARM::VST2LNdAsm_I16: case ARM::VST2LNdAsm_S16: + case ARM::VST2LNdAsm_U16: + return ARM::VST2LNd16; + case ARM::VST2LNdAsm_32: case ARM::VST2LNdAsm_F: + case ARM::VST2LNdAsm_F32: case ARM::VST2LNdAsm_I32: + case ARM::VST2LNdAsm_S32: case ARM::VST2LNdAsm_U32: + return ARM::VST2LNd32; + } +} + +static unsigned getRealVLDLNOpcode(unsigned Opc) { + switch(Opc) { + default: assert(0 && "unexpected opcode!"); + // VLD1LN + case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_P8: + case ARM::VLD1LNdWB_fixed_Asm_I8: case ARM::VLD1LNdWB_fixed_Asm_S8: + case ARM::VLD1LNdWB_fixed_Asm_U8: + return ARM::VLD1LNd8_UPD; + case ARM::VLD1LNdWB_fixed_Asm_16: case ARM::VLD1LNdWB_fixed_Asm_P16: + case ARM::VLD1LNdWB_fixed_Asm_I16: case ARM::VLD1LNdWB_fixed_Asm_S16: + case ARM::VLD1LNdWB_fixed_Asm_U16: + return ARM::VLD1LNd16_UPD; + case ARM::VLD1LNdWB_fixed_Asm_32: case ARM::VLD1LNdWB_fixed_Asm_F: + case ARM::VLD1LNdWB_fixed_Asm_F32: case ARM::VLD1LNdWB_fixed_Asm_I32: + case ARM::VLD1LNdWB_fixed_Asm_S32: case ARM::VLD1LNdWB_fixed_Asm_U32: + return ARM::VLD1LNd32_UPD; + case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_P8: + case ARM::VLD1LNdWB_register_Asm_I8: case ARM::VLD1LNdWB_register_Asm_S8: + case ARM::VLD1LNdWB_register_Asm_U8: + return ARM::VLD1LNd8_UPD; + case ARM::VLD1LNdWB_register_Asm_16: case ARM::VLD1LNdWB_register_Asm_P16: + case ARM::VLD1LNdWB_register_Asm_I16: case ARM::VLD1LNdWB_register_Asm_S16: + case ARM::VLD1LNdWB_register_Asm_U16: + return ARM::VLD1LNd16_UPD; + case ARM::VLD1LNdWB_register_Asm_32: case ARM::VLD1LNdWB_register_Asm_F: + case ARM::VLD1LNdWB_register_Asm_F32: case ARM::VLD1LNdWB_register_Asm_I32: + case ARM::VLD1LNdWB_register_Asm_S32: case ARM::VLD1LNdWB_register_Asm_U32: + return ARM::VLD1LNd32_UPD; + case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_P8: + case ARM::VLD1LNdAsm_I8: case ARM::VLD1LNdAsm_S8: + case ARM::VLD1LNdAsm_U8: + return ARM::VLD1LNd8; + case ARM::VLD1LNdAsm_16: case ARM::VLD1LNdAsm_P16: + case ARM::VLD1LNdAsm_I16: case ARM::VLD1LNdAsm_S16: + case ARM::VLD1LNdAsm_U16: + return ARM::VLD1LNd16; + case ARM::VLD1LNdAsm_32: case ARM::VLD1LNdAsm_F: + case ARM::VLD1LNdAsm_F32: case ARM::VLD1LNdAsm_I32: + case ARM::VLD1LNdAsm_S32: case ARM::VLD1LNdAsm_U32: + return ARM::VLD1LNd32; + + // VLD2LN + case ARM::VLD2LNdWB_fixed_Asm_8: case ARM::VLD2LNdWB_fixed_Asm_P8: + case ARM::VLD2LNdWB_fixed_Asm_I8: case ARM::VLD2LNdWB_fixed_Asm_S8: + case ARM::VLD2LNdWB_fixed_Asm_U8: + return ARM::VLD2LNd8_UPD; + case ARM::VLD2LNdWB_fixed_Asm_16: case ARM::VLD2LNdWB_fixed_Asm_P16: + case ARM::VLD2LNdWB_fixed_Asm_I16: case ARM::VLD2LNdWB_fixed_Asm_S16: + case ARM::VLD2LNdWB_fixed_Asm_U16: + return ARM::VLD2LNd16_UPD; + case ARM::VLD2LNdWB_fixed_Asm_32: case ARM::VLD2LNdWB_fixed_Asm_F: + case ARM::VLD2LNdWB_fixed_Asm_F32: case ARM::VLD2LNdWB_fixed_Asm_I32: + case ARM::VLD2LNdWB_fixed_Asm_S32: case ARM::VLD2LNdWB_fixed_Asm_U32: + return ARM::VLD2LNd32_UPD; + case ARM::VLD2LNdWB_register_Asm_8: case ARM::VLD2LNdWB_register_Asm_P8: + case ARM::VLD2LNdWB_register_Asm_I8: case ARM::VLD2LNdWB_register_Asm_S8: + case ARM::VLD2LNdWB_register_Asm_U8: + return ARM::VLD2LNd8_UPD; + case ARM::VLD2LNdWB_register_Asm_16: case ARM::VLD2LNdWB_register_Asm_P16: + case ARM::VLD2LNdWB_register_Asm_I16: case ARM::VLD2LNdWB_register_Asm_S16: + case ARM::VLD2LNdWB_register_Asm_U16: + return ARM::VLD2LNd16_UPD; + case ARM::VLD2LNdWB_register_Asm_32: case ARM::VLD2LNdWB_register_Asm_F: + case ARM::VLD2LNdWB_register_Asm_F32: case ARM::VLD2LNdWB_register_Asm_I32: + case ARM::VLD2LNdWB_register_Asm_S32: case ARM::VLD2LNdWB_register_Asm_U32: + return ARM::VLD2LNd32_UPD; + case ARM::VLD2LNdAsm_8: case ARM::VLD2LNdAsm_P8: + case ARM::VLD2LNdAsm_I8: case ARM::VLD2LNdAsm_S8: + case ARM::VLD2LNdAsm_U8: + return ARM::VLD2LNd8; + case ARM::VLD2LNdAsm_16: case ARM::VLD2LNdAsm_P16: + case ARM::VLD2LNdAsm_I16: case ARM::VLD2LNdAsm_S16: + case ARM::VLD2LNdAsm_U16: + return ARM::VLD2LNd16; + case ARM::VLD2LNdAsm_32: case ARM::VLD2LNdAsm_F: + case ARM::VLD2LNdAsm_F32: case ARM::VLD2LNdAsm_I32: + case ARM::VLD2LNdAsm_S32: case ARM::VLD2LNdAsm_U32: + return ARM::VLD2LNd32; + } +} + bool ARMAsmParser:: processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { switch (Inst.getOpcode()) { - // Handle the MOV complex aliases. + // Handle NEON VST complex aliases. + case ARM::VST1LNdWB_register_Asm_8: case ARM::VST1LNdWB_register_Asm_P8: + case ARM::VST1LNdWB_register_Asm_I8: case ARM::VST1LNdWB_register_Asm_S8: + case ARM::VST1LNdWB_register_Asm_U8: case ARM::VST1LNdWB_register_Asm_16: + case ARM::VST1LNdWB_register_Asm_P16: case ARM::VST1LNdWB_register_Asm_I16: + case ARM::VST1LNdWB_register_Asm_S16: case ARM::VST1LNdWB_register_Asm_U16: + case ARM::VST1LNdWB_register_Asm_32: case ARM::VST1LNdWB_register_Asm_F: + case ARM::VST1LNdWB_register_Asm_F32: case ARM::VST1LNdWB_register_Asm_I32: + case ARM::VST1LNdWB_register_Asm_S32: case ARM::VST1LNdWB_register_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VST2LNdWB_register_Asm_8: case ARM::VST2LNdWB_register_Asm_P8: + case ARM::VST2LNdWB_register_Asm_I8: case ARM::VST2LNdWB_register_Asm_S8: + case ARM::VST2LNdWB_register_Asm_U8: case ARM::VST2LNdWB_register_Asm_16: + case ARM::VST2LNdWB_register_Asm_P16: case ARM::VST2LNdWB_register_Asm_I16: + case ARM::VST2LNdWB_register_Asm_S16: case ARM::VST2LNdWB_register_Asm_U16: + case ARM::VST2LNdWB_register_Asm_32: case ARM::VST2LNdWB_register_Asm_F: + case ARM::VST2LNdWB_register_Asm_F32: case ARM::VST2LNdWB_register_Asm_I32: + case ARM::VST2LNdWB_register_Asm_S32: case ARM::VST2LNdWB_register_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_P8: + case ARM::VST1LNdWB_fixed_Asm_I8: case ARM::VST1LNdWB_fixed_Asm_S8: + case ARM::VST1LNdWB_fixed_Asm_U8: case ARM::VST1LNdWB_fixed_Asm_16: + case ARM::VST1LNdWB_fixed_Asm_P16: case ARM::VST1LNdWB_fixed_Asm_I16: + case ARM::VST1LNdWB_fixed_Asm_S16: case ARM::VST1LNdWB_fixed_Asm_U16: + case ARM::VST1LNdWB_fixed_Asm_32: case ARM::VST1LNdWB_fixed_Asm_F: + case ARM::VST1LNdWB_fixed_Asm_F32: case ARM::VST1LNdWB_fixed_Asm_I32: + case ARM::VST1LNdWB_fixed_Asm_S32: case ARM::VST1LNdWB_fixed_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VST2LNdWB_fixed_Asm_8: case ARM::VST2LNdWB_fixed_Asm_P8: + case ARM::VST2LNdWB_fixed_Asm_I8: case ARM::VST2LNdWB_fixed_Asm_S8: + case ARM::VST2LNdWB_fixed_Asm_U8: case ARM::VST2LNdWB_fixed_Asm_16: + case ARM::VST2LNdWB_fixed_Asm_P16: case ARM::VST2LNdWB_fixed_Asm_I16: + case ARM::VST2LNdWB_fixed_Asm_S16: case ARM::VST2LNdWB_fixed_Asm_U16: + case ARM::VST2LNdWB_fixed_Asm_32: case ARM::VST2LNdWB_fixed_Asm_F: + case ARM::VST2LNdWB_fixed_Asm_F32: case ARM::VST2LNdWB_fixed_Asm_I32: + case ARM::VST2LNdWB_fixed_Asm_S32: case ARM::VST2LNdWB_fixed_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_P8: case ARM::VST1LNdAsm_I8: + case ARM::VST1LNdAsm_S8: case ARM::VST1LNdAsm_U8: case ARM::VST1LNdAsm_16: + case ARM::VST1LNdAsm_P16: case ARM::VST1LNdAsm_I16: case ARM::VST1LNdAsm_S16: + case ARM::VST1LNdAsm_U16: case ARM::VST1LNdAsm_32: case ARM::VST1LNdAsm_F: + case ARM::VST1LNdAsm_F32: case ARM::VST1LNdAsm_I32: case ARM::VST1LNdAsm_S32: + case ARM::VST1LNdAsm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VST2LNdAsm_8: case ARM::VST2LNdAsm_P8: case ARM::VST2LNdAsm_I8: + case ARM::VST2LNdAsm_S8: case ARM::VST2LNdAsm_U8: case ARM::VST2LNdAsm_16: + case ARM::VST2LNdAsm_P16: case ARM::VST2LNdAsm_I16: case ARM::VST2LNdAsm_S16: + case ARM::VST2LNdAsm_U16: case ARM::VST2LNdAsm_32: case ARM::VST2LNdAsm_F: + case ARM::VST2LNdAsm_F32: case ARM::VST2LNdAsm_I32: case ARM::VST2LNdAsm_S32: + case ARM::VST2LNdAsm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // Handle NEON VLD complex aliases. + case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_P8: + case ARM::VLD1LNdWB_register_Asm_I8: case ARM::VLD1LNdWB_register_Asm_S8: + case ARM::VLD1LNdWB_register_Asm_U8: case ARM::VLD1LNdWB_register_Asm_16: + case ARM::VLD1LNdWB_register_Asm_P16: case ARM::VLD1LNdWB_register_Asm_I16: + case ARM::VLD1LNdWB_register_Asm_S16: case ARM::VLD1LNdWB_register_Asm_U16: + case ARM::VLD1LNdWB_register_Asm_32: case ARM::VLD1LNdWB_register_Asm_F: + case ARM::VLD1LNdWB_register_Asm_F32: case ARM::VLD1LNdWB_register_Asm_I32: + case ARM::VLD1LNdWB_register_Asm_S32: case ARM::VLD1LNdWB_register_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VLD2LNdWB_register_Asm_8: case ARM::VLD2LNdWB_register_Asm_P8: + case ARM::VLD2LNdWB_register_Asm_I8: case ARM::VLD2LNdWB_register_Asm_S8: + case ARM::VLD2LNdWB_register_Asm_U8: case ARM::VLD2LNdWB_register_Asm_16: + case ARM::VLD2LNdWB_register_Asm_P16: case ARM::VLD2LNdWB_register_Asm_I16: + case ARM::VLD2LNdWB_register_Asm_S16: case ARM::VLD2LNdWB_register_Asm_U16: + case ARM::VLD2LNdWB_register_Asm_32: case ARM::VLD2LNdWB_register_Asm_F: + case ARM::VLD2LNdWB_register_Asm_F32: case ARM::VLD2LNdWB_register_Asm_I32: + case ARM::VLD2LNdWB_register_Asm_S32: case ARM::VLD2LNdWB_register_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_P8: + case ARM::VLD1LNdWB_fixed_Asm_I8: case ARM::VLD1LNdWB_fixed_Asm_S8: + case ARM::VLD1LNdWB_fixed_Asm_U8: case ARM::VLD1LNdWB_fixed_Asm_16: + case ARM::VLD1LNdWB_fixed_Asm_P16: case ARM::VLD1LNdWB_fixed_Asm_I16: + case ARM::VLD1LNdWB_fixed_Asm_S16: case ARM::VLD1LNdWB_fixed_Asm_U16: + case ARM::VLD1LNdWB_fixed_Asm_32: case ARM::VLD1LNdWB_fixed_Asm_F: + case ARM::VLD1LNdWB_fixed_Asm_F32: case ARM::VLD1LNdWB_fixed_Asm_I32: + case ARM::VLD1LNdWB_fixed_Asm_S32: case ARM::VLD1LNdWB_fixed_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD2LNdWB_fixed_Asm_8: case ARM::VLD2LNdWB_fixed_Asm_P8: + case ARM::VLD2LNdWB_fixed_Asm_I8: case ARM::VLD2LNdWB_fixed_Asm_S8: + case ARM::VLD2LNdWB_fixed_Asm_U8: case ARM::VLD2LNdWB_fixed_Asm_16: + case ARM::VLD2LNdWB_fixed_Asm_P16: case ARM::VLD2LNdWB_fixed_Asm_I16: + case ARM::VLD2LNdWB_fixed_Asm_S16: case ARM::VLD2LNdWB_fixed_Asm_U16: + case ARM::VLD2LNdWB_fixed_Asm_32: case ARM::VLD2LNdWB_fixed_Asm_F: + case ARM::VLD2LNdWB_fixed_Asm_F32: case ARM::VLD2LNdWB_fixed_Asm_I32: + case ARM::VLD2LNdWB_fixed_Asm_S32: case ARM::VLD2LNdWB_fixed_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_P8: case ARM::VLD1LNdAsm_I8: + case ARM::VLD1LNdAsm_S8: case ARM::VLD1LNdAsm_U8: case ARM::VLD1LNdAsm_16: + case ARM::VLD1LNdAsm_P16: case ARM::VLD1LNdAsm_I16: case ARM::VLD1LNdAsm_S16: + case ARM::VLD1LNdAsm_U16: case ARM::VLD1LNdAsm_32: case ARM::VLD1LNdAsm_F: + case ARM::VLD1LNdAsm_F32: case ARM::VLD1LNdAsm_I32: case ARM::VLD1LNdAsm_S32: + case ARM::VLD1LNdAsm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD2LNdAsm_8: case ARM::VLD2LNdAsm_P8: case ARM::VLD2LNdAsm_I8: + case ARM::VLD2LNdAsm_S8: case ARM::VLD2LNdAsm_U8: case ARM::VLD2LNdAsm_16: + case ARM::VLD2LNdAsm_P16: case ARM::VLD2LNdAsm_I16: case ARM::VLD2LNdAsm_S16: + case ARM::VLD2LNdAsm_U16: case ARM::VLD2LNdAsm_32: case ARM::VLD2LNdAsm_F: + case ARM::VLD2LNdAsm_F32: case ARM::VLD2LNdAsm_I32: case ARM::VLD2LNdAsm_S32: + case ARM::VLD2LNdAsm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // Handle the Thumb2 mode MOV complex aliases. + case ARM::t2MOVsi: + case ARM::t2MOVSsi: { + // Which instruction to expand to depends on the CCOut operand and + // whether we're in an IT block if the register operands are low + // registers. + bool isNarrow = false; + if (isARMLowRegister(Inst.getOperand(0).getReg()) && + isARMLowRegister(Inst.getOperand(1).getReg()) && + inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi)) + isNarrow = true; + MCInst TmpInst; + unsigned newOpc; + switch(ARM_AM::getSORegShOp(Inst.getOperand(2).getImm())) { + default: llvm_unreachable("unexpected opcode!"); + case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break; + case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break; + case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break; + case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break; + } + unsigned Ammount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()); + if (Ammount == 32) Ammount = 0; + TmpInst.setOpcode(newOpc); + TmpInst.addOperand(Inst.getOperand(0)); // Rd + if (isNarrow) + TmpInst.addOperand(MCOperand::CreateReg( + Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(MCOperand::CreateImm(Ammount)); + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + if (!isNarrow) + TmpInst.addOperand(MCOperand::CreateReg( + Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0)); + Inst = TmpInst; + return true; + } + // Handle the ARM mode MOV complex aliases. case ARM::ASRr: case ARM::LSRr: case ARM::LSLr: @@ -4743,6 +5656,24 @@ processInstruction(MCInst &Inst, Inst = TmpInst; } break; + case ARM::t2ADDri12: + // If the immediate fits for encoding T3 (t2ADDri) and the generic "add" + // mnemonic was used (not "addw"), encoding T3 is preferred. + if (static_cast<ARMOperand*>(Operands[0])->getToken() != "add" || + ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1) + break; + Inst.setOpcode(ARM::t2ADDri); + Inst.addOperand(MCOperand::CreateReg(0)); // cc_out + break; + case ARM::t2SUBri12: + // If the immediate fits for encoding T3 (t2SUBri) and the generic "sub" + // mnemonic was used (not "subw"), encoding T3 is preferred. + if (static_cast<ARMOperand*>(Operands[0])->getToken() != "sub" || + ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1) + break; + Inst.setOpcode(ARM::t2SUBri); + Inst.addOperand(MCOperand::CreateReg(0)); // cc_out + break; case ARM::tADDi8: // If the immediate is in the range 0-7, we want tADDi3 iff Rd was // explicitly specified. From the ARM ARM: "Encoding T1 is preferred @@ -4763,6 +5694,26 @@ processInstruction(MCInst &Inst, return true; } break; + case ARM::t2ADDrr: { + // If the destination and first source operand are the same, and + // there's no setting of the flags, use encoding T2 instead of T3. + // Note that this is only for ADD, not SUB. This mirrors the system + // 'as' behaviour. Make sure the wide encoding wasn't explicit. + if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() || + Inst.getOperand(5).getReg() != 0 || + (static_cast<ARMOperand*>(Operands[3])->isToken() && + static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) + break; + MCInst TmpInst; + TmpInst.setOpcode(ARM::tADDhirr); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } case ARM::tB: // A Thumb conditional branch outside of an IT block is a tBcc. if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) { @@ -5079,12 +6030,16 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveWord(4, DirectiveID.getLoc()); else if (IDVal == ".thumb") return parseDirectiveThumb(DirectiveID.getLoc()); + else if (IDVal == ".arm") + return parseDirectiveARM(DirectiveID.getLoc()); else if (IDVal == ".thumb_func") return parseDirectiveThumbFunc(DirectiveID.getLoc()); else if (IDVal == ".code") return parseDirectiveCode(DirectiveID.getLoc()); else if (IDVal == ".syntax") return parseDirectiveSyntax(DirectiveID.getLoc()); + else if (IDVal == ".unreq") + return parseDirectiveUnreq(DirectiveID.getLoc()); return true; } @@ -5120,9 +6075,22 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { return Error(L, "unexpected token in directive"); Parser.Lex(); - // TODO: set thumb mode - // TODO: tell the MC streamer the mode - // getParser().getStreamer().Emit???(); + if (!isThumb()) + SwitchMode(); + getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); + return false; +} + +/// parseDirectiveARM +/// ::= .arm +bool ARMAsmParser::parseDirectiveARM(SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + + if (isThumb()) + SwitchMode(); + getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); return false; } @@ -5212,6 +6180,45 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) { return false; } +/// parseDirectiveReq +/// ::= name .req registername +bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { + Parser.Lex(); // Eat the '.req' token. + unsigned Reg; + SMLoc SRegLoc, ERegLoc; + if (ParseRegister(Reg, SRegLoc, ERegLoc)) { + Parser.EatToEndOfStatement(); + return Error(SRegLoc, "register name expected"); + } + + // Shouldn't be anything else. + if (Parser.getTok().isNot(AsmToken::EndOfStatement)) { + Parser.EatToEndOfStatement(); + return Error(Parser.getTok().getLoc(), + "unexpected input in .req directive."); + } + + Parser.Lex(); // Consume the EndOfStatement + + if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg) + return Error(SRegLoc, "redefinition of '" + Name + + "' does not match original."); + + return false; +} + +/// parseDirectiveUneq +/// ::= .unreq registername +bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { + if (Parser.getTok().isNot(AsmToken::Identifier)) { + Parser.EatToEndOfStatement(); + return Error(L, "unexpected input in .unreq directive."); + } + RegisterReqs.erase(Parser.getTok().getIdentifier()); + Parser.Lex(); // Eat the identifier. + return false; +} + extern "C" void LLVMInitializeARMAsmLexer(); /// Force static initialization. diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt index 3f5ad39..e24a1b1 100644 --- a/lib/Target/ARM/AsmParser/CMakeLists.txt +++ b/lib/Target/ARM/AsmParser/CMakeLists.txt @@ -6,11 +6,3 @@ add_llvm_library(LLVMARMAsmParser ) add_dependencies(LLVMARMAsmParser ARMCommonTableGen) - -add_llvm_library_dependencies(LLVMARMAsmParser - LLVMARMDesc - LLVMARMInfo - LLVMMC - LLVMMCParser - LLVMSupport - ) diff --git a/lib/Target/ARM/AsmParser/LLVMBuild.txt b/lib/Target/ARM/AsmParser/LLVMBuild.txt index cbf9b4b..f0184b6 100644 --- a/lib/Target/ARM/AsmParser/LLVMBuild.txt +++ b/lib/Target/ARM/AsmParser/LLVMBuild.txt @@ -21,4 +21,3 @@ name = ARMAsmParser parent = ARM required_libraries = ARMDesc ARMInfo MC MCParser Support add_to_library_groups = ARM - diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 511932e..04cdf55 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -48,20 +48,6 @@ add_llvm_target(ARMCodeGen Thumb2SizeReduction.cpp ) -add_llvm_library_dependencies(LLVMARMCodeGen - LLVMARMAsmPrinter - LLVMARMDesc - LLVMARMInfo - LLVMAnalysis - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMC - LLVMSelectionDAG - LLVMSupport - LLVMTarget - ) - # workaround for hanging compilation on MSVC9, 10 if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 ) set_property( diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index ad250ab..49c64fd 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -2085,15 +2085,24 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VLD1d32Qwb_register: case ARM::VLD1d64Qwb_fixed: case ARM::VLD1d64Qwb_register: - case ARM::VLD2d8_UPD: - case ARM::VLD2d16_UPD: - case ARM::VLD2d32_UPD: - case ARM::VLD2q8_UPD: - case ARM::VLD2q16_UPD: - case ARM::VLD2q32_UPD: - case ARM::VLD2b8_UPD: - case ARM::VLD2b16_UPD: - case ARM::VLD2b32_UPD: + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d32wb_fixed: + case ARM::VLD2q8wb_fixed: + case ARM::VLD2q16wb_fixed: + case ARM::VLD2q32wb_fixed: + case ARM::VLD2d8wb_register: + case ARM::VLD2d16wb_register: + case ARM::VLD2d32wb_register: + case ARM::VLD2q8wb_register: + case ARM::VLD2q16wb_register: + case ARM::VLD2q32wb_register: + case ARM::VLD2b8wb_fixed: + case ARM::VLD2b16wb_fixed: + case ARM::VLD2b32wb_fixed: + case ARM::VLD2b8wb_register: + case ARM::VLD2b16wb_register: + case ARM::VLD2b32wb_register: case ARM::VLD3d8_UPD: case ARM::VLD3d16_UPD: case ARM::VLD3d32_UPD: @@ -2196,23 +2205,40 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VST1q16wb_register: case ARM::VST1q32wb_register: case ARM::VST1q64wb_register: - case ARM::VST1d8T_UPD: - case ARM::VST1d16T_UPD: - case ARM::VST1d32T_UPD: - case ARM::VST1d64T_UPD: - case ARM::VST1d8Q_UPD: - case ARM::VST1d16Q_UPD: - case ARM::VST1d32Q_UPD: - case ARM::VST1d64Q_UPD: - case ARM::VST2d8_UPD: - case ARM::VST2d16_UPD: - case ARM::VST2d32_UPD: - case ARM::VST2q8_UPD: - case ARM::VST2q16_UPD: - case ARM::VST2q32_UPD: - case ARM::VST2b8_UPD: - case ARM::VST2b16_UPD: - case ARM::VST2b32_UPD: + case ARM::VST1d8Twb_fixed: + case ARM::VST1d16Twb_fixed: + case ARM::VST1d32Twb_fixed: + case ARM::VST1d64Twb_fixed: + case ARM::VST1d8Twb_register: + case ARM::VST1d16Twb_register: + case ARM::VST1d32Twb_register: + case ARM::VST1d64Twb_register: + case ARM::VST1d8Qwb_fixed: + case ARM::VST1d16Qwb_fixed: + case ARM::VST1d32Qwb_fixed: + case ARM::VST1d64Qwb_fixed: + case ARM::VST1d8Qwb_register: + case ARM::VST1d16Qwb_register: + case ARM::VST1d32Qwb_register: + case ARM::VST1d64Qwb_register: + case ARM::VST2d8wb_fixed: + case ARM::VST2d16wb_fixed: + case ARM::VST2d32wb_fixed: + case ARM::VST2d8wb_register: + case ARM::VST2d16wb_register: + case ARM::VST2d32wb_register: + case ARM::VST2q8wb_fixed: + case ARM::VST2q16wb_fixed: + case ARM::VST2q32wb_fixed: + case ARM::VST2q8wb_register: + case ARM::VST2q16wb_register: + case ARM::VST2q32wb_register: + case ARM::VST2b8wb_fixed: + case ARM::VST2b16wb_fixed: + case ARM::VST2b32wb_fixed: + case ARM::VST2b8wb_register: + case ARM::VST2b16wb_register: + case ARM::VST2b32wb_register: case ARM::VST3d8_UPD: case ARM::VST3d16_UPD: case ARM::VST3d32_UPD: @@ -2264,34 +2290,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, // Second input register switch (Inst.getOpcode()) { - case ARM::VST1d8T: - case ARM::VST1d16T: - case ARM::VST1d32T: - case ARM::VST1d64T: - case ARM::VST1d8T_UPD: - case ARM::VST1d16T_UPD: - case ARM::VST1d32T_UPD: - case ARM::VST1d64T_UPD: - case ARM::VST1d8Q: - case ARM::VST1d16Q: - case ARM::VST1d32Q: - case ARM::VST1d64Q: - case ARM::VST1d8Q_UPD: - case ARM::VST1d16Q_UPD: - case ARM::VST1d32Q_UPD: - case ARM::VST1d64Q_UPD: - case ARM::VST2d8: - case ARM::VST2d16: - case ARM::VST2d32: - case ARM::VST2d8_UPD: - case ARM::VST2d16_UPD: - case ARM::VST2d32_UPD: - case ARM::VST2q8: - case ARM::VST2q16: - case ARM::VST2q32: - case ARM::VST2q8_UPD: - case ARM::VST2q16_UPD: - case ARM::VST2q32_UPD: case ARM::VST3d8: case ARM::VST3d16: case ARM::VST3d32: @@ -2307,12 +2305,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder))) return MCDisassembler::Fail; break; - case ARM::VST2b8: - case ARM::VST2b16: - case ARM::VST2b32: - case ARM::VST2b8_UPD: - case ARM::VST2b16_UPD: - case ARM::VST2b32_UPD: case ARM::VST3q8: case ARM::VST3q16: case ARM::VST3q32: @@ -2334,28 +2326,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, // Third input register switch (Inst.getOpcode()) { - case ARM::VST1d8T: - case ARM::VST1d16T: - case ARM::VST1d32T: - case ARM::VST1d64T: - case ARM::VST1d8T_UPD: - case ARM::VST1d16T_UPD: - case ARM::VST1d32T_UPD: - case ARM::VST1d64T_UPD: - case ARM::VST1d8Q: - case ARM::VST1d16Q: - case ARM::VST1d32Q: - case ARM::VST1d64Q: - case ARM::VST1d8Q_UPD: - case ARM::VST1d16Q_UPD: - case ARM::VST1d32Q_UPD: - case ARM::VST1d64Q_UPD: - case ARM::VST2q8: - case ARM::VST2q16: - case ARM::VST2q32: - case ARM::VST2q8_UPD: - case ARM::VST2q16_UPD: - case ARM::VST2q32_UPD: case ARM::VST3d8: case ARM::VST3d16: case ARM::VST3d32: @@ -2392,20 +2362,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, // Fourth input register switch (Inst.getOpcode()) { - case ARM::VST1d8Q: - case ARM::VST1d16Q: - case ARM::VST1d32Q: - case ARM::VST1d64Q: - case ARM::VST1d8Q_UPD: - case ARM::VST1d16Q_UPD: - case ARM::VST1d32Q_UPD: - case ARM::VST1d64Q_UPD: - case ARM::VST2q8: - case ARM::VST2q16: - case ARM::VST2q32: - case ARM::VST2q8_UPD: - case ARM::VST2q16_UPD: - case ARM::VST2q32_UPD: case ARM::VST4d8: case ARM::VST4d16: case ARM::VST4d32: @@ -2441,16 +2397,11 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned Rm = fieldFromInstruction32(Insn, 0, 4); unsigned align = fieldFromInstruction32(Insn, 4, 1); unsigned size = fieldFromInstruction32(Insn, 6, 2); - unsigned regs = fieldFromInstruction32(Insn, 5, 1) + 1; align *= (1 << size); if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; - if (regs == 2) { - if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder))) - return MCDisassembler::Fail; - } if (Rm != 0xF) { if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -2460,12 +2411,12 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(align)); - if (Rm == 0xD) - Inst.addOperand(MCOperand::CreateReg(0)); - else if (Rm != 0xF) { - if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) - return MCDisassembler::Fail; - } + // The fixed offset post-increment encodes Rm == 0xd. The no-writeback + // variant encodes Rm == 0xf. Anything else is a register offset post- + // increment and we need to add the register operand to the instruction. + if (Rm != 0xD && Rm != 0xF && + !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; return S; } @@ -2693,7 +2644,6 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned Rm = fieldFromInstruction32(Insn, 0, 4); Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; unsigned op = fieldFromInstruction32(Insn, 6, 1); - unsigned length = fieldFromInstruction32(Insn, 8, 2) + 1; if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; @@ -2702,10 +2652,8 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; // Writeback } - for (unsigned i = 0; i < length; ++i) { - if (!Check(S, DecodeDPRRegisterClass(Inst, (Rn+i)%32, Address, Decoder))) + if (!Check(S, DecodeDPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; - } if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; @@ -4138,4 +4086,3 @@ static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn, return S; } - diff --git a/lib/Target/ARM/Disassembler/CMakeLists.txt b/lib/Target/ARM/Disassembler/CMakeLists.txt index da87751..9de6e5c 100644 --- a/lib/Target/ARM/Disassembler/CMakeLists.txt +++ b/lib/Target/ARM/Disassembler/CMakeLists.txt @@ -11,11 +11,3 @@ set_property( ) endif() add_dependencies(LLVMARMDisassembler ARMCommonTableGen) - -add_llvm_library_dependencies(LLVMARMDisassembler - LLVMARMCodeGen - LLVMARMDesc - LLVMARMInfo - LLVMMC - LLVMSupport - ) diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt index baa9bc3..94075a9 100644 --- a/lib/Target/ARM/Disassembler/LLVMBuild.txt +++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt @@ -21,4 +21,3 @@ name = ARMDisassembler parent = ARM required_libraries = ARMCodeGen ARMDesc ARMInfo MC Support add_to_library_groups = ARM - diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 6c6c021..662097a 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -1029,3 +1029,29 @@ void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum, << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", " << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "}"; } + +void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[]}"; +} + +void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[]}"; +} + +void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}"; +} + diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 3f38f1a..05db2d2 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -133,6 +133,12 @@ public: void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, + raw_ostream &O); }; } // end namespace llvm diff --git a/lib/Target/ARM/InstPrinter/CMakeLists.txt b/lib/Target/ARM/InstPrinter/CMakeLists.txt index fa0b495..e2d4819 100644 --- a/lib/Target/ARM/InstPrinter/CMakeLists.txt +++ b/lib/Target/ARM/InstPrinter/CMakeLists.txt @@ -5,8 +5,3 @@ add_llvm_library(LLVMARMAsmPrinter ) add_dependencies(LLVMARMAsmPrinter ARMCommonTableGen) - -add_llvm_library_dependencies(LLVMARMAsmPrinter - LLVMMC - LLVMSupport - ) diff --git a/lib/Target/ARM/InstPrinter/LLVMBuild.txt b/lib/Target/ARM/InstPrinter/LLVMBuild.txt index b34aab4..6f4fa36 100644 --- a/lib/Target/ARM/InstPrinter/LLVMBuild.txt +++ b/lib/Target/ARM/InstPrinter/LLVMBuild.txt @@ -21,4 +21,3 @@ name = ARMAsmPrinter parent = ARM required_libraries = MC Support add_to_library_groups = ARM - diff --git a/lib/Target/ARM/LLVMBuild.txt b/lib/Target/ARM/LLVMBuild.txt index 9082539..fd4b3a3 100644 --- a/lib/Target/ARM/LLVMBuild.txt +++ b/lib/Target/ARM/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo + [component_0] type = TargetGroup name = ARM @@ -30,4 +33,3 @@ name = ARMCodeGen parent = ARM required_libraries = ARMAsmPrinter ARMDesc ARMInfo Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target add_to_library_groups = ARM - diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 62d04c4..bf1f0e8 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -102,6 +102,11 @@ public: bool MayNeedRelaxation(const MCInst &Inst) const; + bool fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const; + void RelaxInstruction(const MCInst &Inst, MCInst &Res) const; bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; @@ -124,14 +129,49 @@ public: }; } // end anonymous namespace +static unsigned getRelaxedOpcode(unsigned Op) { + switch (Op) { + default: return Op; + case ARM::tBcc: return ARM::t2Bcc; + } +} + bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const { - // FIXME: Thumb targets, different move constant targets.. + if (getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode()) + return true; return false; } +bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const { + // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the + // low bit being an implied zero. There's an implied +4 offset for the + // branch, so we adjust the other way here to determine what's + // encodable. + // + // Relax if the value is too big for a (signed) i8. + int64_t Offset = int64_t(Value) - 4; + return Offset > 254 || Offset < -256; +} + void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { - assert(0 && "ARMAsmBackend::RelaxInstruction() unimplemented"); - return; + unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode()); + + // Sanity check w/ diagnostic if we get here w/ a bogus instruction. + if (RelaxedOp == Inst.getOpcode()) { + SmallString<256> Tmp; + raw_svector_ostream OS(Tmp); + Inst.dump_pretty(OS); + OS << "\n"; + report_fatal_error("unexpected instruction to relax: " + OS.str()); + } + + // The instructions we're relaxing have (so far) the same operands. + // We just need to update to the proper opcode. + Res = Inst; + Res.setOpcode(RelaxedOp); } bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 865c3e2..c38a882 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -1412,7 +1412,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(Op); if (MO.getReg() == 0) return 0x0D; - return MO.getReg(); + return getARMRegisterNumbering(MO.getReg()); } unsigned ARMMCCodeEmitter:: diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 352c73e..f394b4f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCMachOSymbolFlags.h" #include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Support/ErrorHandling.h" @@ -178,9 +179,16 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer, case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movt_hi16_pcrel: MovtBit = 1; + // The thumb bit shouldn't be set in the 'other-half' bit of the + // relocation, but it will be set in FixedValue if the base symbol + // is a thumb function. Clear it out here. + if (A_SD->getFlags() & SF_ThumbFunc) + FixedValue &= 0xfffffffe; break; case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movt_hi16_pcrel: + if (A_SD->getFlags() & SF_ThumbFunc) + FixedValue &= 0xfffffffe; MovtBit = 1; // Fallthrough case ARM::fixup_t2_movw_lo16: @@ -189,7 +197,6 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer, break; } - if (Type == macho::RIT_ARM_HalfDifference) { uint32_t OtherHalf = MovtBit ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16); diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt index f529314..f2cf78a 100644 --- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt @@ -10,10 +10,3 @@ add_dependencies(LLVMARMDesc ARMCommonTableGen) # Hack: we need to include 'main' target directory to grab private headers include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) - -add_llvm_library_dependencies(LLVMARMDesc - LLVMARMAsmPrinter - LLVMARMInfo - LLVMMC - LLVMSupport - ) diff --git a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt index 46b11c7..2a7fe61 100644 --- a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = ARMDesc parent = ARM required_libraries = ARMAsmPrinter ARMInfo MC Support add_to_library_groups = ARM - diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 2df0053..000a37f 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -139,7 +139,7 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { // FIXME: Detect integer instructions properly. const MCInstrDesc &MCID = MI->getDesc(); unsigned Domain = MCID.TSFlags & ARMII::DomainMask; - if (MCID.mayStore()) + if (MI->mayStore()) return false; unsigned Opcode = MCID.getOpcode(); if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) @@ -222,14 +222,14 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, const MCInstrDesc &MCID2 = TII->get(AddSubOpc); unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI)); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID1, TmpReg) + MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg) .addReg(Src1Reg, getKillRegState(Src1Kill)) .addReg(Src2Reg, getKillRegState(Src2Kill)); if (HasLane) MIB.addImm(LaneImm); MIB.addImm(Pred).addReg(PredReg); - MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID2) + MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2) .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead)); if (NegAcc) { @@ -274,7 +274,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { } const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.isBarrier()) { + if (MI->isBarrier()) { clearStack(); Skip = 0; ++MII; diff --git a/lib/Target/ARM/TargetInfo/CMakeLists.txt b/lib/Target/ARM/TargetInfo/CMakeLists.txt index 8b38b13..533e747 100644 --- a/lib/Target/ARM/TargetInfo/CMakeLists.txt +++ b/lib/Target/ARM/TargetInfo/CMakeLists.txt @@ -5,9 +5,3 @@ add_llvm_library(LLVMARMInfo ) add_dependencies(LLVMARMInfo ARMCommonTableGen) - -add_llvm_library_dependencies(LLVMARMInfo - LLVMMC - LLVMSupport - LLVMTarget - ) diff --git a/lib/Target/ARM/TargetInfo/LLVMBuild.txt b/lib/Target/ARM/TargetInfo/LLVMBuild.txt index 046c1fc..a07a940 100644 --- a/lib/Target/ARM/TargetInfo/LLVMBuild.txt +++ b/lib/Target/ARM/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = ARMInfo parent = ARM required_libraries = MC Support Target add_to_library_groups = ARM - diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index e8ed482..e61c0a7 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -643,14 +643,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, assert(Offset && "This code isn't needed if offset already handled!"); unsigned Opcode = MI.getOpcode(); - const MCInstrDesc &Desc = MI.getDesc(); // Remove predicate first. int PIdx = MI.findFirstPredOperandIdx(); if (PIdx != -1) removeOperands(MI, PIdx); - if (Desc.mayLoad()) { + if (MI.mayLoad()) { // Use the destination register to materialize sp + offset. unsigned TmpReg = MI.getOperand(0).getReg(); bool UseRR = false; @@ -673,7 +672,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame // register. The offset is already handled in the vreg value. MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false); - } else if (Desc.mayStore()) { + } else if (MI.mayStore()) { VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); bool UseRR = false; @@ -699,7 +698,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } // Add predicate back if it's needed. - if (MI.getDesc().isPredicable()) { + if (MI.isPredicable()) { MachineInstrBuilder MIB(&MI); AddDefaultPred(MIB); } diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index b627400..55b4d30 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -13,6 +13,7 @@ #include "Thumb2InstrInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -141,7 +142,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI, // rsb r2, 0 // const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.hasOptionalDef() && + if (MI->hasOptionalDef() && MI->getOperand(MCID.getNumOperands() - 1).getReg() == ARM::CPSR) return false; @@ -198,7 +199,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { // Branches, including tricky ones like LDM_RET, need to end an IT // block so check the instruction we just put in the block. for (; MBBI != E && Pos && - (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) { + (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) { if (MBBI->isDebugValue()) continue; @@ -237,6 +238,9 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { // Last instruction in IT block kills ITSTATE. LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill(); + // Finalize the bundle. + FinalizeBundle(MBB, InsertPos.getInstrIterator(), LastITMI); + Modified = true; ++NumITs; } diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index e5fc8b4..e206288 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -452,7 +452,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, // Add the 16-bit load / store instruction. DebugLoc dl = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc)); + MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc)); if (!isLdStMul) { MIB.addOperand(MI->getOperand(0)); MIB.addOperand(MI->getOperand(1)); @@ -478,7 +478,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); - MBB.erase(MI); + MBB.erase_instr(MI); ++NumLdSts; return true; } @@ -513,7 +513,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR) return false; - MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), + MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(ARM::tADDrSPi)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) @@ -525,7 +525,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB); - MBB.erase(MI); + MBB.erase_instr(MI); ++NumNarrows; return true; } @@ -533,8 +533,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, if (Entry.LowRegs1 && !VerifyLowRegs(MI)) return false; - const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.mayLoad() || MCID.mayStore()) + if (MI->mayLoad() || MI->mayStore()) return ReduceLoadStore(MBB, MI, Entry); switch (Opc) { @@ -654,7 +653,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID); + MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); MIB.addOperand(MI->getOperand(0)); if (NewMCID.hasOptionalDef()) { if (HasCC) @@ -678,7 +677,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); - MBB.erase(MI); + MBB.erase_instr(MI); ++Num2Addrs; return true; } @@ -745,7 +744,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID); + MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); MIB.addOperand(MI->getOperand(0)); if (NewMCID.hasOptionalDef()) { if (HasCC) @@ -785,7 +784,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); - MBB.erase(MI); + MBB.erase_instr(MI); ++NumNarrows; return true; } @@ -830,16 +829,22 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { // Yes, CPSR could be livein. bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); MachineInstr *CPSRDef = 0; + MachineInstr *BundleMI = 0; // If this BB loops back to itself, conservatively avoid narrowing the // first instruction that does partial flag update. bool IsSelfLoop = MBB.isSuccessor(&MBB); - MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); - MachineBasicBlock::iterator NextMII; + MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), E = MBB.instr_end(); + MachineBasicBlock::instr_iterator NextMII; for (; MII != E; MII = NextMII) { NextMII = llvm::next(MII); MachineInstr *MI = &*MII; + if (MI->isBundle()) { + BundleMI = MI; + continue; + } + LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); unsigned Opcode = MI->getOpcode(); @@ -850,7 +855,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { if (Entry.Special) { if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; - MachineBasicBlock::iterator I = prior(NextMII); + MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; } goto ProcessNext; @@ -860,7 +865,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; - MachineBasicBlock::iterator I = prior(NextMII); + MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; goto ProcessNext; } @@ -869,15 +874,24 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; - MachineBasicBlock::iterator I = prior(NextMII); + MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; } } ProcessNext: + if (LiveCPSR && + NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle() && + BundleMI->killsRegister(ARM::CPSR)) + // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill + // marker is only on the BUNDLE instruction. Process the BUNDLE + // instruction as we finish with the bundled instruction to work around + // the inconsistency. + LiveCPSR = false; + bool DefCPSR = false; LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); - if (MI->getDesc().isCall()) { + if (MI->isCall()) { // Calls don't really set CPSR. CPSRDef = 0; IsSelfLoop = false; |