diff options
Diffstat (limited to 'lib/Target')
319 files changed, 28184 insertions, 4263 deletions
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index bbca228..6ae287a 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -493,11 +493,21 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, return false; } - // These modifiers are not yet supported. - case 'p': // The high single-precision register of a VFP double-precision - // register. case 'e': // The low doubleword register of a NEON quad register. - case 'f': // The high doubleword register of a NEON quad register. + case 'f': { // The high doubleword register of a NEON quad register. + if (!MI->getOperand(OpNum).isReg()) + return true; + unsigned Reg = MI->getOperand(OpNum).getReg(); + if (!ARM::QPRRegClass.contains(Reg)) + return true; + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ? + ARM::dsub_0 : ARM::dsub_1); + O << ARMInstPrinter::getRegisterName(SubReg); + return false; + } + + // These modifiers are not yet supported. case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1. case 'H': // The highest-numbered register of a pair. return true; @@ -739,14 +749,14 @@ void ARMAsmPrinter::emitAttributes() { } // Signal various FP modes. - if (!UnsafeFPMath) { + if (!TM.Options.UnsafeFPMath) { AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::Allowed); AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions, ARMBuildAttrs::Allowed); } - if (NoInfsFPMath && NoNaNsFPMath) + if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath) AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model, ARMBuildAttrs::Allowed); else @@ -759,7 +769,7 @@ void ARMAsmPrinter::emitAttributes() { AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1); // Hard float. Use both S and D registers and conform to AAPCS-VFP. - if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) { + if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) { AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3); AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1); } @@ -1069,7 +1079,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { } // Try to figure out the unwinding opcode out of src / dst regs. - if (MI->getDesc().mayStore()) { + if (MI->mayStore()) { // Register saves. assert(DstReg == ARM::SP && "Only stack pointer as a destination reg is supported"); @@ -1481,11 +1491,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { /// in the function. The first operand is the ID# for this instruction, the /// second is the index into the MachineConstantPool that this is, the third /// is the size in bytes of this constant pool entry. + /// The required alignment is specified on the basic block holding this MI. unsigned LabelId = (unsigned)MI->getOperand(0).getImm(); unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex(); - EmitAlignment(2); - // Mark the constant pool entry as data if we're not already in a data // region. OutStreamer.EmitDataRegion(); @@ -1934,4 +1943,3 @@ extern "C" void LLVMInitializeARMAsmPrinter() { RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget); RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget); } - diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 9315348..8bf5475 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -146,7 +146,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); - bool isLoad = !MCID.mayStore(); + bool isLoad = !MI->mayStore(); const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); const MachineOperand &Base = MI->getOperand(2); const MachineOperand &Offset = MI->getOperand(NumOps-3); @@ -439,6 +439,22 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { return false; } +bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const { + if (MI->isBundle()) { + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + int PIdx = I->findFirstPredOperandIdx(); + if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL) + return true; + } + return false; + } + + int PIdx = MI->findFirstPredOperandIdx(); + return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; +} + bool ARMBaseInstrInfo:: PredicateInstruction(MachineInstr *MI, const SmallVectorImpl<MachineOperand> &Pred) const { @@ -491,7 +507,7 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, std::vector<MachineOperand> &Pred) const { // FIXME: This confuses implicit_def with optional CPSR def. const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef()) + if (!MCID.getImplicitDefs() && !MI->hasOptionalDef()) return false; bool Found = false; @@ -510,11 +526,10 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, /// By default, this returns true for every instruction with a /// PredicateOperand. bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.isPredicable()) + if (!MI->isPredicable()) return false; - if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { + if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { ARMFunctionInfo *AFI = MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); return AFI->isThumb2Function(); @@ -548,7 +563,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); if (MI->isLabel()) return 0; - unsigned Opc = MI->getOpcode(); + unsigned Opc = MI->getOpcode(); switch (Opc) { case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: @@ -556,6 +571,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case TargetOpcode::EH_LABEL: case TargetOpcode::DBG_VALUE: return 0; + case TargetOpcode::BUNDLE: + return getInstBundleLength(MI); case ARM::MOVi16_ga_pcrel: case ARM::MOVTi16_ga_pcrel: case ARM::t2MOVi16_ga_pcrel: @@ -593,7 +610,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); unsigned NumOps = MCID.getNumOperands(); MachineOperand JTOP = - MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2)); + MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); unsigned JTI = JTOP.getIndex(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); assert(MJTI != 0); @@ -622,6 +639,17 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { return 0; // Not reached } +unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const { + unsigned Size = 0; + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + assert(!I->isBundle() && "No nested bundle!"); + Size += GetInstSizeInBytes(&*I); + } + return Size; +} + void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -845,7 +873,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const { const MachineMemOperand *Dummy; - return MI->getDesc().mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); + return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); } void ARMBaseInstrInfo:: @@ -991,7 +1019,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const { const MachineMemOperand *Dummy; - return MI->getDesc().mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); + return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); } bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ @@ -1357,7 +1385,7 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, return false; // Terminators and labels can't be scheduled around. - if (MI->getDesc().isTerminator() || MI->isLabel()) + if (MI->isTerminator() || MI->isLabel()) return true; // Treat the start of the IT block as a scheduling boundary, but schedule @@ -1762,8 +1790,7 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, // Check that CPSR isn't set between the comparison instruction and the one we // want to change. - MachineBasicBlock::const_iterator I = CmpInstr, E = MI, - B = MI->getParent()->begin(); + MachineBasicBlock::iterator I = CmpInstr,E = MI, B = MI->getParent()->begin(); // Early exit if CmpInstr is at the beginning of the BB. if (I == B) return false; @@ -1957,7 +1984,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, bool isKill = UseMI->getOperand(OpIdx).isKill(); unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(), - *UseMI, UseMI->getDebugLoc(), + UseMI, UseMI->getDebugLoc(), get(NewUseOpc), NewReg) .addReg(Reg1, getKillRegState(isKill)) .addImm(SOImmValV1))); @@ -2332,6 +2359,59 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return UseCycle; } +static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, + const MachineInstr *MI, unsigned Reg, + unsigned &DefIdx, unsigned &Dist) { + Dist = 0; + + MachineBasicBlock::const_iterator I = MI; ++I; + MachineBasicBlock::const_instr_iterator II = + llvm::prior(I.getInstrIterator()); + assert(II->isInsideBundle() && "Empty bundle?"); + + int Idx = -1; + while (II->isInsideBundle()) { + Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI); + if (Idx != -1) + break; + --II; + ++Dist; + } + + assert(Idx != -1 && "Cannot find bundled definition!"); + DefIdx = Idx; + return II; +} + +static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, + const MachineInstr *MI, unsigned Reg, + unsigned &UseIdx, unsigned &Dist) { + Dist = 0; + + MachineBasicBlock::const_instr_iterator II = MI; ++II; + assert(II->isInsideBundle() && "Empty bundle?"); + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + + // FIXME: This doesn't properly handle multiple uses. + int Idx = -1; + while (II != E && II->isInsideBundle()) { + Idx = II->findRegisterUseOperandIdx(Reg, false, TRI); + if (Idx != -1) + break; + if (II->getOpcode() != ARM::t2IT) + ++Dist; + ++II; + } + + if (Idx == -1) { + Dist = 0; + return 0; + } + + UseIdx = Idx; + return II; +} + int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, @@ -2340,35 +2420,77 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, DefMI->isRegSequence() || DefMI->isImplicitDef()) return 1; - const MCInstrDesc &DefMCID = DefMI->getDesc(); if (!ItinData || ItinData->isEmpty()) - return DefMCID.mayLoad() ? 3 : 1; + return DefMI->mayLoad() ? 3 : 1; - const MCInstrDesc &UseMCID = UseMI->getDesc(); + const MCInstrDesc *DefMCID = &DefMI->getDesc(); + const MCInstrDesc *UseMCID = &UseMI->getDesc(); const MachineOperand &DefMO = DefMI->getOperand(DefIdx); - if (DefMO.getReg() == ARM::CPSR) { + unsigned Reg = DefMO.getReg(); + if (Reg == ARM::CPSR) { if (DefMI->getOpcode() == ARM::FMSTAT) { // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) return Subtarget.isCortexA9() ? 1 : 20; } // CPSR set and branch can be paired in the same cycle. - if (UseMCID.isBranch()) + if (UseMI->isBranch()) return 0; + + // Otherwise it takes the instruction latency (generally one). + int Latency = getInstrLatency(ItinData, DefMI); + + // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to + // its uses. Instructions which are otherwise scheduled between them may + // incur a code size penalty (not able to use the CPSR setting 16-bit + // instructions). + if (Latency > 0 && Subtarget.isThumb2()) { + const MachineFunction *MF = DefMI->getParent()->getParent(); + if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + --Latency; + } + return Latency; } unsigned DefAlign = DefMI->hasOneMemOperand() ? (*DefMI->memoperands_begin())->getAlignment() : 0; unsigned UseAlign = UseMI->hasOneMemOperand() ? (*UseMI->memoperands_begin())->getAlignment() : 0; - int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, - UseMCID, UseIdx, UseAlign); + + unsigned DefAdj = 0; + if (DefMI->isBundle()) { + DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj); + if (DefMI->isCopyLike() || DefMI->isInsertSubreg() || + DefMI->isRegSequence() || DefMI->isImplicitDef()) + return 1; + DefMCID = &DefMI->getDesc(); + } + unsigned UseAdj = 0; + if (UseMI->isBundle()) { + unsigned NewUseIdx; + const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI, + Reg, NewUseIdx, UseAdj); + if (NewUseMI) { + UseMI = NewUseMI; + UseIdx = NewUseIdx; + UseMCID = &UseMI->getDesc(); + } + } + + int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign, + *UseMCID, UseIdx, UseAlign); + int Adj = DefAdj + UseAdj; + if (Adj) { + Latency -= (int)(DefAdj + UseAdj); + if (Latency < 1) + return 1; + } if (Latency > 1 && (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. - switch (DefMCID.getOpcode()) { + switch (DefMCID->getOpcode()) { default: break; case ARM::LDRrs: case ARM::LDRBrs: { @@ -2393,7 +2515,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } if (DefAlign < 8 && Subtarget.isCortexA9()) - switch (DefMCID.getOpcode()) { + switch (DefMCID->getOpcode()) { default: break; case ARM::VLD1q8: case ARM::VLD1q16: @@ -2413,12 +2535,18 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD2q8: case ARM::VLD2q16: case ARM::VLD2q32: - case ARM::VLD2d8_UPD: - case ARM::VLD2d16_UPD: - case ARM::VLD2d32_UPD: - case ARM::VLD2q8_UPD: - case ARM::VLD2q16_UPD: - case ARM::VLD2q32_UPD: + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d32wb_fixed: + case ARM::VLD2q8wb_fixed: + case ARM::VLD2q16wb_fixed: + case ARM::VLD2q32wb_fixed: + case ARM::VLD2d8wb_register: + case ARM::VLD2d16wb_register: + case ARM::VLD2d32wb_register: + case ARM::VLD2q8wb_register: + case ARM::VLD2q16wb_register: + case ARM::VLD2q32wb_register: case ARM::VLD3d8: case ARM::VLD3d16: case ARM::VLD3d32: @@ -2446,9 +2574,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD1DUPq8: case ARM::VLD1DUPq16: case ARM::VLD1DUPq32: - case ARM::VLD1DUPq8_UPD: - case ARM::VLD1DUPq16_UPD: - case ARM::VLD1DUPq32_UPD: + case ARM::VLD1DUPq8wb_fixed: + case ARM::VLD1DUPq16wb_fixed: + case ARM::VLD1DUPq32wb_fixed: + case ARM::VLD1DUPq8wb_register: + case ARM::VLD1DUPq16wb_register: + case ARM::VLD1DUPq32wb_register: case ARM::VLD2DUPd8: case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: @@ -2580,12 +2711,18 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD2q8Pseudo: case ARM::VLD2q16Pseudo: case ARM::VLD2q32Pseudo: - case ARM::VLD2d8Pseudo_UPD: - case ARM::VLD2d16Pseudo_UPD: - case ARM::VLD2d32Pseudo_UPD: - case ARM::VLD2q8Pseudo_UPD: - case ARM::VLD2q16Pseudo_UPD: - case ARM::VLD2q32Pseudo_UPD: + case ARM::VLD2d8PseudoWB_fixed: + case ARM::VLD2d16PseudoWB_fixed: + case ARM::VLD2d32PseudoWB_fixed: + case ARM::VLD2q8PseudoWB_fixed: + case ARM::VLD2q16PseudoWB_fixed: + case ARM::VLD2q32PseudoWB_fixed: + case ARM::VLD2d8PseudoWB_register: + case ARM::VLD2d16PseudoWB_register: + case ARM::VLD2d32PseudoWB_register: + case ARM::VLD2q8PseudoWB_register: + case ARM::VLD2q16PseudoWB_register: + case ARM::VLD2q32PseudoWB_register: case ARM::VLD3d8Pseudo: case ARM::VLD3d16Pseudo: case ARM::VLD3d32Pseudo: @@ -2621,9 +2758,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD1DUPq8Pseudo: case ARM::VLD1DUPq16Pseudo: case ARM::VLD1DUPq32Pseudo: - case ARM::VLD1DUPq8Pseudo_UPD: - case ARM::VLD1DUPq16Pseudo_UPD: - case ARM::VLD1DUPq32Pseudo_UPD: + case ARM::VLD1DUPq8PseudoWB_fixed: + case ARM::VLD1DUPq16PseudoWB_fixed: + case ARM::VLD1DUPq32PseudoWB_fixed: + case ARM::VLD1DUPq8PseudoWB_register: + case ARM::VLD1DUPq16PseudoWB_register: + case ARM::VLD1DUPq32PseudoWB_register: case ARM::VLD2DUPd8Pseudo: case ARM::VLD2DUPd16Pseudo: case ARM::VLD2DUPd32Pseudo: @@ -2671,6 +2811,19 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } +unsigned +ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *DepMI) const { + unsigned Reg = DefMI->getOperand(DefIdx).getReg(); + if (DepMI->readsRegister(Reg, &getRegisterInfo()) || !isPredicated(DepMI)) + return 1; + + // If the second MI is predicated, then there is an implicit use dependency. + return getOperandLatency(ItinData, DefMI, DefIdx, DepMI, + DepMI->getNumOperands()); +} + int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost) const { @@ -2681,6 +2834,17 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, if (!ItinData || ItinData->isEmpty()) return 1; + if (MI->isBundle()) { + int Latency = 0; + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + if (I->getOpcode() != ARM::t2IT) + Latency += getInstrLatency(ItinData, I, PredCost); + } + return Latency; + } + const MCInstrDesc &MCID = MI->getDesc(); unsigned Class = MCID.getSchedClass(); unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 0f9f321..68e8208 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -69,10 +69,7 @@ public: bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; // Predication support. - bool isPredicated(const MachineInstr *MI) const { - int PIdx = MI->findFirstPredOperandIdx(); - return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; - } + bool isPredicated(const MachineInstr *MI) const; ARMCC::CondCodes getPredicate(const MachineInstr *MI) const { int PIdx = MI->findFirstPredOperandIdx(); @@ -213,12 +210,18 @@ public: SDNode *DefNode, unsigned DefIdx, SDNode *UseNode, unsigned UseIdx) const; + virtual unsigned getOutputLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *DepMI) const; + /// VFP/NEON execution domains. std::pair<uint16_t, uint16_t> getExecutionDomain(const MachineInstr *MI) const; void setExecutionDomain(MachineInstr *MI, unsigned Domain) const; private: + unsigned getInstBundleLength(const MachineInstr *MI) const; + int getVLDMDefCycle(const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID, unsigned DefClass, diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 7c42342..8ee6ce2 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -631,7 +631,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { // 1. Dynamic stack realignment is explicitly disabled, // 2. This is a Thumb1 function (it's not useful, so we don't bother), or // 3. There are VLAs in the function and the base pointer is disabled. - return (RealignStack && !AFI->isThumb1OnlyFunction() && + return (MF.getTarget().Options.RealignStack && !AFI->isThumb1OnlyFunction() && (!MFI->hasVarSizedObjects() || EnableBasePointer)); } @@ -649,7 +649,7 @@ needsStackRealignment(const MachineFunction &MF) const { bool ARMBaseRegisterInfo:: cannotEliminateFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - if (DisableFramePointerElim(MF) && MFI->adjustsStack()) + if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack()) return true; return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || needsStackRealignment(MF); diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index d74ccfa..365f0bb 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -401,7 +401,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { MCE.StartMachineBasicBlock(MBB); - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) emitInstruction(*I); } diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 3e3a413..2039d41 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -26,6 +26,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -51,6 +52,43 @@ static cl::opt<bool> AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true), cl::desc("Adjust basic block layout to better use TB[BH]")); +static cl::opt<bool> +AlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true), + cl::desc("Align constant islands in code")); + +/// UnknownPadding - Return the worst case padding that could result from +/// unknown offset bits. This does not include alignment padding caused by +/// known offset bits. +/// +/// @param LogAlign log2(alignment) +/// @param KnownBits Number of known low offset bits. +static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { + if (KnownBits < LogAlign) + return (1u << LogAlign) - (1u << KnownBits); + return 0; +} + +/// WorstCaseAlign - Assuming only the low KnownBits bits in Offset are exact, +/// add padding such that: +/// +/// 1. The result is aligned to 1 << LogAlign. +/// +/// 2. No other value of the unknown bits would require more padding. +/// +/// This may add more padding than is required to satisfy just one of the +/// constraints. It is necessary to compute alignment this way to guarantee +/// that we don't underestimate the padding before an aligned block. If the +/// real padding before a block is larger than we think, constant pool entries +/// may go out of range. +static inline unsigned WorstCaseAlign(unsigned Offset, unsigned LogAlign, + unsigned KnownBits) { + // Add the worst possible padding that the unknown bits could cause. + Offset += UnknownPadding(LogAlign, KnownBits); + + // Then align the result. + return RoundUpToAlignment(Offset, 1u << LogAlign); +} + namespace { /// ARMConstantIslands - Due to limited PC-relative displacements, ARM /// requires constant pool entries to be scattered among the instructions @@ -64,16 +102,70 @@ namespace { /// CPE - A constant pool entry that has been placed somewhere, which /// tracks a list of users. class ARMConstantIslands : public MachineFunctionPass { - /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed - /// by MBB Number. The two-byte pads required for Thumb alignment are - /// counted as part of the following block (i.e., the offset and size for - /// a padded block will both be ==2 mod 4). - std::vector<unsigned> BBSizes; + /// BasicBlockInfo - Information about the offset and size of a single + /// basic block. + struct BasicBlockInfo { + /// Offset - Distance from the beginning of the function to the beginning + /// of this basic block. + /// + /// The offset is always aligned as required by the basic block. + unsigned Offset; + + /// Size - Size of the basic block in bytes. If the block contains + /// inline assembly, this is a worst case estimate. + /// + /// The size does not include any alignment padding whether from the + /// beginning of the block, or from an aligned jump table at the end. + unsigned Size; + + /// KnownBits - The number of low bits in Offset that are known to be + /// exact. The remaining bits of Offset are an upper bound. + uint8_t KnownBits; + + /// Unalign - When non-zero, the block contains instructions (inline asm) + /// of unknown size. The real size may be smaller than Size bytes by a + /// multiple of 1 << Unalign. + uint8_t Unalign; + + /// PostAlign - When non-zero, the block terminator contains a .align + /// directive, so the end of the block is aligned to 1 << PostAlign + /// bytes. + uint8_t PostAlign; + + BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0), + PostAlign(0) {} + + /// Compute the number of known offset bits internally to this block. + /// This number should be used to predict worst case padding when + /// splitting the block. + unsigned internalKnownBits() const { + return Unalign ? Unalign : KnownBits; + } + + /// Compute the offset immediately following this block. If LogAlign is + /// specified, return the offset the successor block will get if it has + /// this alignment. + unsigned postOffset(unsigned LogAlign = 0) const { + unsigned PO = Offset + Size; + unsigned LA = std::max(unsigned(PostAlign), LogAlign); + if (!LA) + return PO; + // Add alignment padding from the terminator. + return WorstCaseAlign(PO, LA, internalKnownBits()); + } + + /// Compute the number of known low bits of postOffset. If this block + /// contains inline asm, the number of known bits drops to the + /// instruction alignment. An aligned terminator may increase the number + /// of know bits. + /// If LogAlign is given, also consider the alignment of the next block. + unsigned postKnownBits(unsigned LogAlign = 0) const { + return std::max(std::max(unsigned(PostAlign), LogAlign), + internalKnownBits()); + } + }; - /// BBOffsets - the offset of each MBB in bytes, starting from 0. - /// The two-byte pads required for Thumb alignment are counted as part of - /// the following block. - std::vector<unsigned> BBOffsets; + std::vector<BasicBlockInfo> BBInfo; /// WaterList - A sorted list of basic blocks where islands could be placed /// (i.e. blocks that don't fall through to the following block, due @@ -162,9 +254,8 @@ namespace { /// the branch fix up pass. bool HasFarJump; - /// HasInlineAsm - True if the function contains inline assembly. - bool HasInlineAsm; - + MachineFunction *MF; + MachineConstantPool *MCP; const ARMInstrInfo *TII; const ARMSubtarget *STI; ARMFunctionInfo *AFI; @@ -182,67 +273,65 @@ namespace { } private: - void DoInitialPlacement(MachineFunction &MF, - std::vector<MachineInstr*> &CPEMIs); + void DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); - void JumpTableFunctionScan(MachineFunction &MF); - void InitialFunctionScan(MachineFunction &MF, - const std::vector<MachineInstr*> &CPEMIs); + unsigned getCPELogAlign(const MachineInstr *CPEMI); + void JumpTableFunctionScan(); + void InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs); MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI); void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB); - void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta); + void AdjustBBOffsetsAfter(MachineBasicBlock *BB); bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI); int LookForExistingCPEntry(CPUser& U, unsigned UserOffset); bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter); void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset, MachineBasicBlock *&NewMBB); - bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex); + bool HandleConstantPoolUser(unsigned CPUserIndex); void RemoveDeadCPEMI(MachineInstr *CPEMI); bool RemoveUnusedCPEntries(); bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset, MachineInstr *CPEMI, unsigned Disp, bool NegOk, bool DoDump = false); bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water, - CPUser &U); - bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, - unsigned Disp, bool NegativeOK, bool IsSoImm = false); + CPUser &U, unsigned &Growth); bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); - bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br); - bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br); - bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br); + bool FixUpImmediateBr(ImmBranch &Br); + bool FixUpConditionalBr(ImmBranch &Br); + bool FixUpUnconditionalBr(ImmBranch &Br); bool UndoLRSpillRestore(); - bool OptimizeThumb2Instructions(MachineFunction &MF); - bool OptimizeThumb2Branches(MachineFunction &MF); - bool ReorderThumb2JumpTables(MachineFunction &MF); - bool OptimizeThumb2JumpTables(MachineFunction &MF); + bool OptimizeThumb2Instructions(); + bool OptimizeThumb2Branches(); + bool ReorderThumb2JumpTables(); + bool OptimizeThumb2JumpTables(); MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB); + void ComputeBlockSize(MachineBasicBlock *MBB); unsigned GetOffsetOf(MachineInstr *MI) const; void dumpBBs(); - void verify(MachineFunction &MF); + void verify(); + + bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, + unsigned Disp, bool NegativeOK, bool IsSoImm = false); + bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, + const CPUser &U) { + return OffsetIsInRange(UserOffset, TrialOffset, + U.MaxDisp, U.NegOk, U.IsSoImm); + } }; char ARMConstantIslands::ID = 0; } /// verify - check BBOffsets, BBSizes, alignment of islands -void ARMConstantIslands::verify(MachineFunction &MF) { - assert(BBOffsets.size() == BBSizes.size()); - for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i) - assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]); - if (!isThumb) - return; +void ARMConstantIslands::verify() { #ifndef NDEBUG - for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = MBBI; - if (!MBB->empty() && - MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) { - unsigned MBBId = MBB->getNumber(); - assert(HasInlineAsm || - (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) || - (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0)); - } + unsigned Align = MBB->getAlignment(); + unsigned MBBId = MBB->getNumber(); + assert(BBInfo[MBBId].Offset % (1u << Align) == 0); + assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); } for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { CPUser &U = CPUsers[i]; @@ -257,10 +346,16 @@ void ARMConstantIslands::verify(MachineFunction &MF) { /// print block size and offset information - debugging void ARMConstantIslands::dumpBBs() { - for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) { - DEBUG(errs() << "block " << J << " offset " << BBOffsets[J] - << " size " << BBSizes[J] << "\n"); - } + DEBUG({ + for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { + const BasicBlockInfo &BBI = BBInfo[J]; + dbgs() << format("%08x BB#%u\t", BBI.Offset, J) + << " kb=" << unsigned(BBI.KnownBits) + << " ua=" << unsigned(BBI.Unalign) + << " pa=" << unsigned(BBI.PostAlign) + << format(" size=%#x\n", BBInfo[J].Size); + } + }); } /// createARMConstantIslandPass - returns an instance of the constpool @@ -269,34 +364,38 @@ FunctionPass *llvm::createARMConstantIslandPass() { return new ARMConstantIslands(); } -bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { - MachineConstantPool &MCP = *MF.getConstantPool(); +bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + MCP = mf.getConstantPool(); - TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo(); - AFI = MF.getInfo<ARMFunctionInfo>(); - STI = &MF.getTarget().getSubtarget<ARMSubtarget>(); + DEBUG(dbgs() << "***** ARMConstantIslands: " + << MCP->getConstants().size() << " CP entries, aligned to " + << MCP->getConstantPoolAlignment() << " bytes *****\n"); + + TII = (const ARMInstrInfo*)MF->getTarget().getInstrInfo(); + AFI = MF->getInfo<ARMFunctionInfo>(); + STI = &MF->getTarget().getSubtarget<ARMSubtarget>(); isThumb = AFI->isThumbFunction(); isThumb1 = AFI->isThumb1OnlyFunction(); isThumb2 = AFI->isThumb2Function(); HasFarJump = false; - HasInlineAsm = false; // Renumber all of the machine basic blocks in the function, guaranteeing that // the numbers agree with the position of the block in the function. - MF.RenumberBlocks(); + MF->RenumberBlocks(); // Try to reorder and otherwise adjust the block layout to make good use // of the TB[BH] instructions. bool MadeChange = false; if (isThumb2 && AdjustJumpTableBlocks) { - JumpTableFunctionScan(MF); - MadeChange |= ReorderThumb2JumpTables(MF); + JumpTableFunctionScan(); + MadeChange |= ReorderThumb2JumpTables(); // Data is out of date, so clear it. It'll be re-computed later. T2JumpTables.clear(); // Blocks may have shifted around. Keep the numbering up to date. - MF.RenumberBlocks(); + MF->RenumberBlocks(); } // Thumb1 functions containing constant pools get 4-byte alignment. @@ -304,16 +403,13 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // ARM and Thumb2 functions need to be 4-byte aligned. if (!isThumb1) - MF.EnsureAlignment(2); // 2 = log2(4) + MF->EnsureAlignment(2); // 2 = log2(4) // Perform the initial placement of the constant pool entries. To start with, // we put them all at the end of the function. std::vector<MachineInstr*> CPEMIs; - if (!MCP.isEmpty()) { - DoInitialPlacement(MF, CPEMIs); - if (isThumb1) - MF.EnsureAlignment(2); // 2 = log2(4) - } + if (!MCP->isEmpty()) + DoInitialPlacement(CPEMIs); /// The next UID to take is the first unused one. AFI->initPICLabelUId(CPEMIs.size()); @@ -321,7 +417,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // Do the initial scan of the function, building up information about the // sizes of each block, the location of all the water, and finding all of the // constant pool users. - InitialFunctionScan(MF, CPEMIs); + InitialFunctionScan(CPEMIs); CPEMIs.clear(); DEBUG(dumpBBs()); @@ -333,9 +429,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // is no change. unsigned NoCPIters = 0, NoBRIters = 0; while (true) { + DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n'); bool CPChange = false; for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) - CPChange |= HandleConstantPoolUser(MF, i); + CPChange |= HandleConstantPoolUser(i); if (CPChange && ++NoCPIters > 30) llvm_unreachable("Constant Island pass failed to converge!"); DEBUG(dumpBBs()); @@ -344,9 +441,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // appear as "new water" for the next iteration of constant pool placement. NewWaterList.clear(); + DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n'); bool BRChange = false; for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) - BRChange |= FixUpImmediateBr(MF, ImmBranches[i]); + BRChange |= FixUpImmediateBr(ImmBranches[i]); if (BRChange && ++NoBRIters > 30) llvm_unreachable("Branch Fix Up pass failed to converge!"); DEBUG(dumpBBs()); @@ -358,10 +456,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // Shrink 32-bit Thumb2 branch, load, and store instructions. if (isThumb2 && !STI->prefers32BitThumb()) - MadeChange |= OptimizeThumb2Instructions(MF); + MadeChange |= OptimizeThumb2Instructions(); // After a while, this might be made debug-only, but it is not expensive. - verify(MF); + verify(); // If LR has been forced spilled and no far jump (i.e. BL) has been issued, // undo the spill / restore of LR if possible. @@ -376,10 +474,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { } } - DEBUG(errs() << '\n'; dumpBBs()); + DEBUG(dbgs() << '\n'; dumpBBs()); - BBSizes.clear(); - BBOffsets.clear(); + BBInfo.clear(); WaterList.clear(); CPUsers.clear(); CPEntries.clear(); @@ -392,37 +489,65 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { /// DoInitialPlacement - Perform the initial placement of the constant pool /// entries. To start with, we put them all at the end of the function. -void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF, - std::vector<MachineInstr*> &CPEMIs) { +void +ARMConstantIslands::DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { // Create the basic block to hold the CPE's. - MachineBasicBlock *BB = MF.CreateMachineBasicBlock(); - MF.push_back(BB); + MachineBasicBlock *BB = MF->CreateMachineBasicBlock(); + MF->push_back(BB); + + // MachineConstantPool measures alignment in bytes. We measure in log2(bytes). + unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment()); + + // Mark the basic block as required by the const-pool. + // If AlignConstantIslands isn't set, use 4-byte alignment for everything. + BB->setAlignment(AlignConstantIslands ? MaxAlign : 2); + + // The function needs to be as aligned as the basic blocks. The linker may + // move functions around based on their alignment. + MF->EnsureAlignment(BB->getAlignment()); + + // Order the entries in BB by descending alignment. That ensures correct + // alignment of all entries as long as BB is sufficiently aligned. Keep + // track of the insertion point for each alignment. We are going to bucket + // sort the entries as they are created. + SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end()); // Add all of the constants from the constant pool to the end block, use an // identity mapping of CPI's to CPE's. - const std::vector<MachineConstantPoolEntry> &CPs = - MF.getConstantPool()->getConstants(); + const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants(); - const TargetData &TD = *MF.getTarget().getTargetData(); + const TargetData &TD = *MF->getTarget().getTargetData(); for (unsigned i = 0, e = CPs.size(); i != e; ++i) { unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); - // Verify that all constant pool entries are a multiple of 4 bytes. If not, - // we would have to pad them out or something so that instructions stay - // aligned. - assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!"); + assert(Size >= 4 && "Too small constant pool entry"); + unsigned Align = CPs[i].getAlignment(); + assert(isPowerOf2_32(Align) && "Invalid alignment"); + // Verify that all constant pool entries are a multiple of their alignment. + // If not, we would have to pad them out so that instructions stay aligned. + assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!"); + + // Insert CONSTPOOL_ENTRY before entries with a smaller alignment. + unsigned LogAlign = Log2_32(Align); + MachineBasicBlock::iterator InsAt = InsPoint[LogAlign]; MachineInstr *CPEMI = - BuildMI(BB, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY)) + BuildMI(*BB, InsAt, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY)) .addImm(i).addConstantPoolIndex(i).addImm(Size); CPEMIs.push_back(CPEMI); + // Ensure that future entries with higher alignment get inserted before + // CPEMI. This is bucket sort with iterators. + for (unsigned a = LogAlign + 1; a < MaxAlign; ++a) + if (InsPoint[a] == InsAt) + InsPoint[a] = CPEMI; + // Add a new CPEntry, but no corresponding CPUser yet. std::vector<CPEntry> CPEs; CPEs.push_back(CPEntry(CPEMI, i)); CPEntries.push_back(CPEs); ++NumCPEs; - DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i - << "\n"); + DEBUG(dbgs() << "Moved CPI#" << i << " to end of function\n"); } + DEBUG(BB->dump()); } /// BBHasFallthrough - Return true if the specified basic block can fallthrough @@ -458,17 +583,33 @@ ARMConstantIslands::CPEntry return NULL; } +/// getCPELogAlign - Returns the required alignment of the constant pool entry +/// represented by CPEMI. Alignment is measured in log2(bytes) units. +unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { + assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY); + + // Everything is 4-byte aligned unless AlignConstantIslands is set. + if (!AlignConstantIslands) + return 2; + + unsigned CPI = CPEMI->getOperand(1).getIndex(); + assert(CPI < MCP->getConstants().size() && "Invalid constant pool index."); + unsigned Align = MCP->getConstants()[CPI].getAlignment(); + assert(isPowerOf2_32(Align) && "Invalid CPE alignment"); + return Log2_32(Align); +} + /// JumpTableFunctionScan - Do a scan of the function, building up /// information about the sizes of each block and the locations of all /// the jump tables. -void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) { - for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); +void ARMConstantIslands::JumpTableFunctionScan() { + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock &MBB = *MBBI; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) - if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT) + if (I->isBranch() && I->getOpcode() == ARM::t2BR_JT) T2JumpTables.push_back(I); } } @@ -476,23 +617,27 @@ void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) { /// InitialFunctionScan - Do the initial scan of the function, building up /// information about the sizes of each block, the location of all the water, /// and finding all of the constant pool users. -void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, - const std::vector<MachineInstr*> &CPEMIs) { - // First thing, see if the function has any inline assembly in it. If so, - // we have to be conservative about alignment assumptions, as we don't - // know for sure the size of any instructions in the inline assembly. - for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); - MBBI != E; ++MBBI) { - MachineBasicBlock &MBB = *MBBI; - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) - if (I->getOpcode() == ARM::INLINEASM) - HasInlineAsm = true; - } +void ARMConstantIslands:: +InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs) { + BBInfo.clear(); + BBInfo.resize(MF->getNumBlockIDs()); + + // First thing, compute the size of all basic blocks, and see if the function + // has any inline assembly in it. If so, we have to be conservative about + // alignment assumptions, as we don't know for sure the size of any + // instructions in the inline assembly. + for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) + ComputeBlockSize(I); + + // The known bits of the entry block offset are determined by the function + // alignment. + BBInfo.front().KnownBits = MF->getAlignment(); + + // Compute block offsets and known bits. + AdjustBBOffsetsAfter(MF->begin()); // Now go back through the instructions and build up our data structures. - unsigned Offset = 0; - for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock &MBB = *MBBI; @@ -501,16 +646,13 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, if (!BBHasFallthrough(&MBB)) WaterList.push_back(&MBB); - unsigned MBBSize = 0; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { if (I->isDebugValue()) continue; - // Add instruction size to MBBSize. - MBBSize += TII->GetInstSizeInBytes(I); int Opc = I->getOpcode(); - if (I->getDesc().isBranch()) { + if (I->isBranch()) { bool isCond = false; unsigned Bits = 0; unsigned Scale = 1; @@ -518,18 +660,6 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, switch (Opc) { default: continue; // Ignore other JT branches - case ARM::tBR_JTr: - // A Thumb1 table jump may involve padding; for the offsets to - // be right, functions containing these must be 4-byte aligned. - // tBR_JTr expands to a mov pc followed by .align 2 and then the jump - // table entries. So this code checks whether offset of tBR_JTr + 2 - // is aligned. That is held in Offset+MBBSize, which already has - // 2 added in for the size of the mov pc instruction. - MF.EnsureAlignment(2U); - if ((Offset+MBBSize)%4 != 0 || HasInlineAsm) - // FIXME: Add a pseudo ALIGN instruction instead. - MBBSize += 2; // padding - continue; // Does not get an entry in ImmBranches case ARM::t2BR_JT: T2JumpTables.push_back(I); continue; // Does not get an entry in ImmBranches @@ -647,18 +777,30 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, break; } } + } +} - // In thumb mode, if this block is a constpool island, we may need padding - // so it's aligned on 4 byte boundary. - if (isThumb && - !MBB.empty() && - MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY && - ((Offset%4) != 0 || HasInlineAsm)) - MBBSize += 2; - - BBSizes.push_back(MBBSize); - BBOffsets.push_back(Offset); - Offset += MBBSize; +/// ComputeBlockSize - Compute the size and some alignment information for MBB. +/// This function updates BBInfo directly. +void ARMConstantIslands::ComputeBlockSize(MachineBasicBlock *MBB) { + BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; + BBI.Size = 0; + BBI.Unalign = 0; + BBI.PostAlign = 0; + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) { + BBI.Size += TII->GetInstSizeInBytes(I); + // For inline asm, GetInstSizeInBytes returns a conservative estimate. + // The actual size may be smaller, but still a multiple of the instr size. + if (I->isInlineAsm()) + BBI.Unalign = isThumb ? 1 : 2; + } + + // tBR_JTr contains a .align 2 directive. + if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) { + BBI.PostAlign = 2; + MBB->getParent()->EnsureAlignment(2); } } @@ -671,14 +813,7 @@ unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const { // The offset is composed of two things: the sum of the sizes of all MBB's // before this instruction's block, and the offset from the start of the block // it is in. - unsigned Offset = BBOffsets[MBB->getNumber()]; - - // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has - // alignment padding, and compensate if so. - if (isThumb && - MI->getOpcode() == ARM::CONSTPOOL_ENTRY && - (Offset%4 != 0 || HasInlineAsm)) - Offset += 2; + unsigned Offset = BBInfo[MBB->getNumber()].Offset; // Sum instructions before MI in MBB. for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) { @@ -702,12 +837,9 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { // Renumber the MBB's to keep them consecutive. NewBB->getParent()->RenumberBlocks(NewBB); - // Insert a size into BBSizes to align it properly with the (newly + // Insert an entry into BBInfo to align it properly with the (newly // renumbered) block numbers. - BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0); - - // Likewise for BBOffsets. - BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0); + BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); // Next, update WaterList. Specifically, we need to add NewMBB as having // available water after it. @@ -723,13 +855,12 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { /// account for this change and returns the newly created block. MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { MachineBasicBlock *OrigBB = MI->getParent(); - MachineFunction &MF = *OrigBB->getParent(); // Create a new MBB for the code after the OrigBB. MachineBasicBlock *NewBB = - MF.CreateMachineBasicBlock(OrigBB->getBasicBlock()); + MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); MachineFunction::iterator MBBI = OrigBB; ++MBBI; - MF.insert(MBBI, NewBB); + MF->insert(MBBI, NewBB); // Splice the instructions starting with MI over to NewBB. NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); @@ -747,16 +878,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { ++NumSplit; // Update the CFG. All succs of OrigBB are now succs of NewBB. - while (!OrigBB->succ_empty()) { - MachineBasicBlock *Succ = *OrigBB->succ_begin(); - OrigBB->removeSuccessor(Succ); - NewBB->addSuccessor(Succ); - - // This pass should be run after register allocation, so there should be no - // PHI nodes to update. - assert((Succ->empty() || !Succ->begin()->isPHI()) - && "PHI nodes should be eliminated by now!"); - } + NewBB->transferSuccessors(OrigBB); // OrigBB branches to NewBB. OrigBB->addSuccessor(NewBB); @@ -764,14 +886,11 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { // Update internal data structures to account for the newly inserted MBB. // This is almost the same as UpdateForInsertedWaterBlock, except that // the Water goes after OrigBB, not NewBB. - MF.RenumberBlocks(NewBB); + MF->RenumberBlocks(NewBB); - // Insert a size into BBSizes to align it properly with the (newly + // Insert an entry into BBInfo to align it properly with the (newly // renumbered) block numbers. - BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0); - - // Likewise for BBOffsets. - BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0); + BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); // Next, update WaterList. Specifically, we need to add OrigMBB as having // available water after it (but not if it's already there, which happens @@ -787,54 +906,19 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { WaterList.insert(IP, OrigBB); NewWaterList.insert(OrigBB); - unsigned OrigBBI = OrigBB->getNumber(); - unsigned NewBBI = NewBB->getNumber(); - - int delta = isThumb1 ? 2 : 4; - // Figure out how large the OrigBB is. As the first half of the original // block, it cannot contain a tablejump. The size includes // the new jump we added. (It should be possible to do this without // recounting everything, but it's very confusing, and this is rarely // executed.) - unsigned OrigBBSize = 0; - for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end(); - I != E; ++I) - OrigBBSize += TII->GetInstSizeInBytes(I); - BBSizes[OrigBBI] = OrigBBSize; - - // ...and adjust BBOffsets for NewBB accordingly. - BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI]; + ComputeBlockSize(OrigBB); // Figure out how large the NewMBB is. As the second half of the original // block, it may contain a tablejump. - unsigned NewBBSize = 0; - for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end(); - I != E; ++I) - NewBBSize += TII->GetInstSizeInBytes(I); - // Set the size of NewBB in BBSizes. It does not include any padding now. - BBSizes[NewBBI] = NewBBSize; - - MachineInstr* ThumbJTMI = prior(NewBB->end()); - if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) { - // We've added another 2-byte instruction before this tablejump, which - // means we will always need padding if we didn't before, and vice versa. - - // The original offset of the jump instruction was: - unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta; - if (OrigOffset%4 == 0) { - // We had padding before and now we don't. No net change in code size. - delta = 0; - } else { - // We didn't have padding before and now we do. - BBSizes[NewBBI] += 2; - delta = 4; - } - } + ComputeBlockSize(NewBB); // All BBOffsets following these blocks must be modified. - if (delta) - AdjustBBOffsetsAfter(NewBB, delta); + AdjustBBOffsetsAfter(OrigBB); return NewBB; } @@ -882,19 +966,44 @@ bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset, /// WaterIsInRange - Returns true if a CPE placed after the specified /// Water (a basic block) will be in range for the specific MI. - +/// +/// Compute how much the function will grow by inserting a CPE after Water. bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset, - MachineBasicBlock* Water, CPUser &U) { - unsigned MaxDisp = U.MaxDisp; - unsigned CPEOffset = BBOffsets[Water->getNumber()] + - BBSizes[Water->getNumber()]; - - // If the CPE is to be inserted before the instruction, that will raise - // the offset of the instruction. - if (CPEOffset < UserOffset) - UserOffset += U.CPEMI->getOperand(2).getImm(); - - return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, U.NegOk, U.IsSoImm); + MachineBasicBlock* Water, CPUser &U, + unsigned &Growth) { + unsigned CPELogAlign = getCPELogAlign(U.CPEMI); + unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign); + unsigned NextBlockOffset, NextBlockAlignment; + MachineFunction::const_iterator NextBlock = Water; + if (++NextBlock == MF->end()) { + NextBlockOffset = BBInfo[Water->getNumber()].postOffset(); + NextBlockAlignment = 0; + } else { + NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset; + NextBlockAlignment = NextBlock->getAlignment(); + } + unsigned Size = U.CPEMI->getOperand(2).getImm(); + unsigned CPEEnd = CPEOffset + Size; + + // The CPE may be able to hide in the alignment padding before the next + // block. It may also cause more padding to be required if it is more aligned + // that the next block. + if (CPEEnd > NextBlockOffset) { + Growth = CPEEnd - NextBlockOffset; + // Compute the padding that would go at the end of the CPE to align the next + // block. + Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment); + + // If the CPE is to be inserted before the instruction, that will raise + // the offset of the instruction. Also account for unknown alignment padding + // in blocks between CPE and the user. + if (CPEOffset < UserOffset) + UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign); + } else + // CPE fits in existing padding. + Growth = 0; + + return OffsetIsInRange(UserOffset, CPEOffset, U); } /// CPEIsInRange - Returns true if the distance between specific MI and @@ -903,14 +1012,20 @@ bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset, MachineInstr *CPEMI, unsigned MaxDisp, bool NegOk, bool DoDump) { unsigned CPEOffset = GetOffsetOf(CPEMI); - assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE"); + assert(CPEOffset % 4 == 0 && "Misaligned CPE"); if (DoDump) { - DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm() - << " max delta=" << MaxDisp - << " insn address=" << UserOffset - << " CPE address=" << CPEOffset - << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI); + DEBUG({ + unsigned Block = MI->getParent()->getNumber(); + const BasicBlockInfo &BBI = BBInfo[Block]; + dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm() + << " max delta=" << MaxDisp + << format(" insn address=%#x", UserOffset) + << " in BB#" << Block << ": " + << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI + << format("CPE address=%#x offset=%+d: ", CPEOffset, + int(CPEOffset-UserOffset)); + }); } return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk); @@ -933,55 +1048,17 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) { } #endif // NDEBUG -void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB, - int delta) { - MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI); - for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs(); - i < e; ++i) { - BBOffsets[i] += delta; - // If some existing blocks have padding, adjust the padding as needed, a - // bit tricky. delta can be negative so don't use % on that. - if (!isThumb) - continue; - MachineBasicBlock *MBB = MBBI; - if (!MBB->empty() && !HasInlineAsm) { - // Constant pool entries require padding. - if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) { - unsigned OldOffset = BBOffsets[i] - delta; - if ((OldOffset%4) == 0 && (BBOffsets[i]%4) != 0) { - // add new padding - BBSizes[i] += 2; - delta += 2; - } else if ((OldOffset%4) != 0 && (BBOffsets[i]%4) == 0) { - // remove existing padding - BBSizes[i] -= 2; - delta -= 2; - } - } - // Thumb1 jump tables require padding. They should be at the end; - // following unconditional branches are removed by AnalyzeBranch. - // tBR_JTr expands to a mov pc followed by .align 2 and then the jump - // table entries. So this code checks whether offset of tBR_JTr - // is aligned; if it is, the offset of the jump table following the - // instruction will not be aligned, and we need padding. - MachineInstr *ThumbJTMI = prior(MBB->end()); - if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) { - unsigned NewMIOffset = GetOffsetOf(ThumbJTMI); - unsigned OldMIOffset = NewMIOffset - delta; - if ((OldMIOffset%4) == 0 && (NewMIOffset%4) != 0) { - // remove existing padding - BBSizes[i] -= 2; - delta -= 2; - } else if ((OldMIOffset%4) != 0 && (NewMIOffset%4) == 0) { - // add new padding - BBSizes[i] += 2; - delta += 2; - } - } - if (delta==0) - return; - } - MBBI = llvm::next(MBBI); +void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB) { + for(unsigned i = BB->getNumber() + 1, e = MF->getNumBlockIDs(); i < e; ++i) { + // Get the offset and known bits at the end of the layout predecessor. + // Include the alignment of the current block. + unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment(); + unsigned Offset = BBInfo[i - 1].postOffset(LogAlign); + unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign); + + // This is where block i begins. + BBInfo[i].Offset = Offset; + BBInfo[i].KnownBits = KnownBits; } } @@ -1016,7 +1093,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) // Check to see if the CPE is already in-range. if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) { - DEBUG(errs() << "In range\n"); + DEBUG(dbgs() << "In range\n"); return 1; } @@ -1031,7 +1108,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) if (CPEs[i].CPEMI == NULL) continue; if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) { - DEBUG(errs() << "Replacing CPE#" << CPI << " with CPE#" + DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" << CPEs[i].CPI << "\n"); // Point the CPUser node to the replacement U.CPEMI = CPEs[i].CPEMI; @@ -1079,10 +1156,9 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, if (WaterList.empty()) return false; - bool FoundWaterThatWouldPad = false; - water_iterator IPThatWouldPad; - for (water_iterator IP = prior(WaterList.end()), - B = WaterList.begin();; --IP) { + unsigned BestGrowth = ~0u; + for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();; + --IP) { MachineBasicBlock* WaterBB = *IP; // Check if water is in range and is either at a lower address than the // current "high water mark" or a new water block that was created since @@ -1092,31 +1168,24 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, // should be relatively uncommon and when it does happen, we want to be // sure to take advantage of it for all the CPEs near that block, so that // we don't insert more branches than necessary. - if (WaterIsInRange(UserOffset, WaterBB, U) && + unsigned Growth; + if (WaterIsInRange(UserOffset, WaterBB, U, Growth) && (WaterBB->getNumber() < U.HighWaterMark->getNumber() || - NewWaterList.count(WaterBB))) { - unsigned WBBId = WaterBB->getNumber(); - if (isThumb && - (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) { - // This is valid Water, but would introduce padding. Remember - // it in case we don't find any Water that doesn't do this. - if (!FoundWaterThatWouldPad) { - FoundWaterThatWouldPad = true; - IPThatWouldPad = IP; - } - } else { - WaterIter = IP; + NewWaterList.count(WaterBB)) && Growth < BestGrowth) { + // This is the least amount of required padding seen so far. + BestGrowth = Growth; + WaterIter = IP; + DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber() + << " Growth=" << Growth << '\n'); + + // Keep looking unless it is perfect. + if (BestGrowth == 0) return true; - } } if (IP == B) break; } - if (FoundWaterThatWouldPad) { - WaterIter = IPThatWouldPad; - return true; - } - return false; + return BestGrowth != ~0u; } /// CreateNewWater - No existing WaterList entry will work for @@ -1132,114 +1201,143 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; + unsigned CPELogAlign = getCPELogAlign(CPEMI); MachineBasicBlock *UserMBB = UserMI->getParent(); - unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] + - BBSizes[UserMBB->getNumber()]; - assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]); + const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()]; // If the block does not end in an unconditional branch already, and if the // end of the block is within range, make new water there. (The addition // below is for the unconditional branch we will be adding: 4 bytes on ARM + // Thumb2, 2 on Thumb1. Possible Thumb1 alignment padding is allowed for // inside OffsetIsInRange. - if (BBHasFallthrough(UserMBB) && - OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4), - U.MaxDisp, U.NegOk, U.IsSoImm)) { - DEBUG(errs() << "Split at end of block\n"); - if (&UserMBB->back() == UserMI) - assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!"); - NewMBB = llvm::next(MachineFunction::iterator(UserMBB)); - // Add an unconditional branch from UserMBB to fallthrough block. - // Record it for branch lengthening; this new branch will not get out of - // range, but if the preceding conditional branch is out of range, the - // targets will be exchanged, and the altered branch may be out of - // range, so the machinery has to know about it. - int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B; - if (!isThumb) - BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB); - else - BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB) - .addImm(ARMCC::AL).addReg(0); - unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); - ImmBranches.push_back(ImmBranch(&UserMBB->back(), - MaxDisp, false, UncondBr)); - int delta = isThumb1 ? 2 : 4; - BBSizes[UserMBB->getNumber()] += delta; - AdjustBBOffsetsAfter(UserMBB, delta); - } else { - // What a big block. Find a place within the block to split it. - // This is a little tricky on Thumb1 since instructions are 2 bytes - // and constant pool entries are 4 bytes: if instruction I references - // island CPE, and instruction I+1 references CPE', it will - // not work well to put CPE as far forward as possible, since then - // CPE' cannot immediately follow it (that location is 2 bytes - // farther away from I+1 than CPE was from I) and we'd need to create - // a new island. So, we make a first guess, then walk through the - // instructions between the one currently being looked at and the - // possible insertion point, and make sure any other instructions - // that reference CPEs will be able to use the same island area; - // if not, we back up the insertion point. - - // The 4 in the following is for the unconditional branch we'll be - // inserting (allows for long branch on Thumb1). Alignment of the - // island is handled inside OffsetIsInRange. - unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4; - // This could point off the end of the block if we've already got - // constant pool entries following this block; only the last one is - // in the water list. Back past any possible branches (allow for a - // conditional and a maximally long unconditional). - if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1]) - BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] - - (isThumb1 ? 6 : 8); - unsigned EndInsertOffset = BaseInsertOffset + - CPEMI->getOperand(2).getImm(); - MachineBasicBlock::iterator MI = UserMI; - ++MI; - unsigned CPUIndex = CPUserIndex+1; - unsigned NumCPUsers = CPUsers.size(); - MachineInstr *LastIT = 0; - for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); - Offset < BaseInsertOffset; - Offset += TII->GetInstSizeInBytes(MI), - MI = llvm::next(MI)) { - if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { - CPUser &U = CPUsers[CPUIndex]; - if (!OffsetIsInRange(Offset, EndInsertOffset, - U.MaxDisp, U.NegOk, U.IsSoImm)) { - BaseInsertOffset -= (isThumb1 ? 2 : 4); - EndInsertOffset -= (isThumb1 ? 2 : 4); - } - // This is overly conservative, as we don't account for CPEMIs - // being reused within the block, but it doesn't matter much. - EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm(); - CPUIndex++; - } + if (BBHasFallthrough(UserMBB)) { + // Size of branch to insert. + unsigned Delta = isThumb1 ? 2 : 4; + // End of UserBlock after adding a branch. + unsigned UserBlockEnd = UserBBI.postOffset() + Delta; + // Compute the offset where the CPE will begin. + unsigned CPEOffset = WorstCaseAlign(UserBlockEnd, CPELogAlign, + UserBBI.postKnownBits()); + + if (OffsetIsInRange(UserOffset, CPEOffset, U)) { + DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber() + << format(", expected CPE offset %#x\n", CPEOffset)); + NewMBB = llvm::next(MachineFunction::iterator(UserMBB)); + // Add an unconditional branch from UserMBB to fallthrough block. Record + // it for branch lengthening; this new branch will not get out of range, + // but if the preceding conditional branch is out of range, the targets + // will be exchanged, and the altered branch may be out of range, so the + // machinery has to know about it. + int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B; + if (!isThumb) + BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB); + else + BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB) + .addImm(ARMCC::AL).addReg(0); + unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); + ImmBranches.push_back(ImmBranch(&UserMBB->back(), + MaxDisp, false, UncondBr)); + BBInfo[UserMBB->getNumber()].Size += Delta; + AdjustBBOffsetsAfter(UserMBB); + return; + } + } - // Remember the last IT instruction. - if (MI->getOpcode() == ARM::t2IT) - LastIT = MI; + // What a big block. Find a place within the block to split it. This is a + // little tricky on Thumb1 since instructions are 2 bytes and constant pool + // entries are 4 bytes: if instruction I references island CPE, and + // instruction I+1 references CPE', it will not work well to put CPE as far + // forward as possible, since then CPE' cannot immediately follow it (that + // location is 2 bytes farther away from I+1 than CPE was from I) and we'd + // need to create a new island. So, we make a first guess, then walk through + // the instructions between the one currently being looked at and the + // possible insertion point, and make sure any other instructions that + // reference CPEs will be able to use the same island area; if not, we back + // up the insertion point. + + // Try to split the block so it's fully aligned. Compute the latest split + // point where we can add a 4-byte branch instruction, and then + // WorstCaseAlign to LogAlign. + unsigned LogAlign = MF->getAlignment(); + assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry"); + unsigned KnownBits = UserBBI.internalKnownBits(); + unsigned UPad = UnknownPadding(LogAlign, KnownBits); + unsigned BaseInsertOffset = UserOffset + U.MaxDisp; + DEBUG(dbgs() << format("Split in middle of big block before %#x", + BaseInsertOffset)); + + // Account for alignment and unknown padding. + BaseInsertOffset &= ~((1u << LogAlign) - 1); + BaseInsertOffset -= UPad; + + // The 4 in the following is for the unconditional branch we'll be inserting + // (allows for long branch on Thumb1). Alignment of the island is handled + // inside OffsetIsInRange. + BaseInsertOffset -= 4; + + DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset) + << " la=" << LogAlign + << " kb=" << KnownBits + << " up=" << UPad << '\n'); + + // This could point off the end of the block if we've already got constant + // pool entries following this block; only the last one is in the water list. + // Back past any possible branches (allow for a conditional and a maximally + // long unconditional). + if (BaseInsertOffset >= BBInfo[UserMBB->getNumber()+1].Offset) + BaseInsertOffset = BBInfo[UserMBB->getNumber()+1].Offset - + (isThumb1 ? 6 : 8); + unsigned EndInsertOffset = + WorstCaseAlign(BaseInsertOffset + 4, LogAlign, KnownBits) + + CPEMI->getOperand(2).getImm(); + MachineBasicBlock::iterator MI = UserMI; + ++MI; + unsigned CPUIndex = CPUserIndex+1; + unsigned NumCPUsers = CPUsers.size(); + MachineInstr *LastIT = 0; + for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); + Offset < BaseInsertOffset; + Offset += TII->GetInstSizeInBytes(MI), + MI = llvm::next(MI)) { + if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { + CPUser &U = CPUsers[CPUIndex]; + if (!OffsetIsInRange(Offset, EndInsertOffset, U)) { + // Shift intertion point by one unit of alignment so it is within reach. + BaseInsertOffset -= 1u << LogAlign; + EndInsertOffset -= 1u << LogAlign; + } + // This is overly conservative, as we don't account for CPEMIs being + // reused within the block, but it doesn't matter much. Also assume CPEs + // are added in order with alignment padding. We may eventually be able + // to pack the aligned CPEs better. + EndInsertOffset = RoundUpToAlignment(EndInsertOffset, + 1u << getCPELogAlign(U.CPEMI)) + + U.CPEMI->getOperand(2).getImm(); + CPUIndex++; } - DEBUG(errs() << "Split in middle of big block\n"); - --MI; + // Remember the last IT instruction. + if (MI->getOpcode() == ARM::t2IT) + LastIT = MI; + } - // Avoid splitting an IT block. - if (LastIT) { - unsigned PredReg = 0; - ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); - if (CC != ARMCC::AL) - MI = LastIT; - } - NewMBB = SplitBlockBeforeInstr(MI); + --MI; + + // Avoid splitting an IT block. + if (LastIT) { + unsigned PredReg = 0; + ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); + if (CC != ARMCC::AL) + MI = LastIT; } + NewMBB = SplitBlockBeforeInstr(MI); } /// HandleConstantPoolUser - Analyze the specified user, checking to see if it /// is out-of-range. If so, pick up the constant pool value and move it some /// place in-range. Return true if we changed any addresses (thus must run /// another pass of branch lengthening), false otherwise. -bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, - unsigned CPUserIndex) { +bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; @@ -1260,11 +1358,11 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, unsigned ID = AFI->createPICLabelUId(); // Look for water where we can place this CPE. - MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock(); + MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock(); MachineBasicBlock *NewMBB; water_iterator IP; if (LookForWater(U, UserOffset, IP)) { - DEBUG(errs() << "found water in range\n"); + DEBUG(dbgs() << "Found water in range\n"); MachineBasicBlock *WaterBB = *IP; // If the original WaterList entry was "new water" on this iteration, @@ -1279,7 +1377,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, } else { // No water found. - DEBUG(errs() << "No water found\n"); + DEBUG(dbgs() << "No water found\n"); CreateNewWater(CPUserIndex, UserOffset, NewMBB); // SplitBlockBeforeInstr adds to WaterList, which is important when it is @@ -1304,7 +1402,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, WaterList.erase(IP); // Okay, we know we can put an island before NewMBB now, do it! - MF.insert(NewMBB, NewIsland); + MF->insert(NewMBB, NewIsland); // Update internal data structures to account for the newly inserted MBB. UpdateForInsertedWaterBlock(NewIsland); @@ -1320,13 +1418,12 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1)); ++NumCPEs; - BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()]; - // Compensate for .align 2 in thumb mode. - if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm)) - Size += 2; + // Mark the basic block as aligned as required by the const-pool entry. + NewIsland->setAlignment(getCPELogAlign(U.CPEMI)); + // Increase the size of the island block to account for the new entry. - BBSizes[NewIsland->getNumber()] += Size; - AdjustBBOffsetsAfter(NewIsland, Size); + BBInfo[NewIsland->getNumber()].Size += Size; + AdjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland))); // Finally, change the CPI in the instruction operand to be ID. for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) @@ -1335,8 +1432,8 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, break; } - DEBUG(errs() << " Moved CPE to #" << ID << " CPI=" << CPI - << '\t' << *UserMI); + DEBUG(dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI + << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset)); return true; } @@ -1347,19 +1444,18 @@ void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) { MachineBasicBlock *CPEBB = CPEMI->getParent(); unsigned Size = CPEMI->getOperand(2).getImm(); CPEMI->eraseFromParent(); - BBSizes[CPEBB->getNumber()] -= Size; + BBInfo[CPEBB->getNumber()].Size -= Size; // All succeeding offsets have the current size value added in, fix this. if (CPEBB->empty()) { - // In thumb1 mode, the size of island may be padded by two to compensate for - // the alignment requirement. Then it will now be 2 when the block is - // empty, so fix this. - // All succeeding offsets have the current size value added in, fix this. - if (BBSizes[CPEBB->getNumber()] != 0) { - Size += BBSizes[CPEBB->getNumber()]; - BBSizes[CPEBB->getNumber()] = 0; - } - } - AdjustBBOffsetsAfter(CPEBB, -Size); + BBInfo[CPEBB->getNumber()].Size = 0; + + // This block no longer needs to be aligned. <rdar://problem/10534709>. + CPEBB->setAlignment(0); + } else + // Entries are sorted by descending alignment, so realign from the front. + CPEBB->setAlignment(getCPELogAlign(CPEBB->begin())); + + AdjustBBOffsetsAfter(CPEBB); // An island has only one predecessor BB and one successor BB. Check if // this BB's predecessor jumps directly to this BB's successor. This // shouldn't happen currently. @@ -1390,9 +1486,9 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB, unsigned MaxDisp) { unsigned PCAdj = isThumb ? 4 : 8; unsigned BrOffset = GetOffsetOf(MI) + PCAdj; - unsigned DestOffset = BBOffsets[DestBB->getNumber()]; + unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; - DEBUG(errs() << "Branch of destination BB#" << DestBB->getNumber() + DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() << " from BB#" << MI->getParent()->getNumber() << " max delta=" << MaxDisp << " from " << GetOffsetOf(MI) << " to " << DestOffset @@ -1411,7 +1507,7 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB, /// FixUpImmediateBr - Fix up an immediate branch whose destination is too far /// away to fit in its displacement field. -bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) { +bool ARMConstantIslands::FixUpImmediateBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); @@ -1420,8 +1516,8 @@ bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) { return false; if (!Br.isCond) - return FixUpUnconditionalBr(MF, Br); - return FixUpConditionalBr(MF, Br); + return FixUpUnconditionalBr(Br); + return FixUpConditionalBr(Br); } /// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is @@ -1429,7 +1525,7 @@ bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) { /// spilled in the epilogue, then we can use BL to implement a far jump. /// Otherwise, add an intermediate branch instruction to a branch. bool -ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) { +ARMConstantIslands::FixUpUnconditionalBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *MBB = MI->getParent(); if (!isThumb1) @@ -1438,12 +1534,12 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) { // Use BL to implement far jump. Br.MaxDisp = (1 << 21) * 2; MI->setDesc(TII->get(ARM::tBfar)); - BBSizes[MBB->getNumber()] += 2; - AdjustBBOffsetsAfter(MBB, 2); + BBInfo[MBB->getNumber()].Size += 2; + AdjustBBOffsetsAfter(MBB); HasFarJump = true; ++NumUBrFixed; - DEBUG(errs() << " Changed B to long jump " << *MI); + DEBUG(dbgs() << " Changed B to long jump " << *MI); return true; } @@ -1452,7 +1548,7 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) { /// far away to fit in its displacement field. It is converted to an inverse /// conditional branch + an unconditional branch to the destination. bool -ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { +ARMConstantIslands::FixUpConditionalBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); @@ -1487,7 +1583,7 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { // b L1 MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); if (BBIsInRange(MI, NewDest, Br.MaxDisp)) { - DEBUG(errs() << " Invert Bcc condition and swap its destination with " + DEBUG(dbgs() << " Invert Bcc condition and swap its destination with " << *BMI); BMI->getOperand(0).setMBB(DestBB); MI->getOperand(0).setMBB(NewDest); @@ -1502,15 +1598,13 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { // No need for the branch to the next block. We're adding an unconditional // branch to the destination. int delta = TII->GetInstSizeInBytes(&MBB->back()); - BBSizes[MBB->getNumber()] -= delta; - MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB)); - AdjustBBOffsetsAfter(SplitBB, -delta); + BBInfo[MBB->getNumber()].Size -= delta; MBB->back().eraseFromParent(); - // BBOffsets[SplitBB] is wrong temporarily, fixed below + // BBInfo[SplitBB].Offset is wrong temporarily, fixed below } MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); - DEBUG(errs() << " Insert B to BB#" << DestBB->getNumber() + DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber() << " also invert condition and change dest. to BB#" << NextBB->getNumber() << "\n"); @@ -1519,23 +1613,20 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode())) .addMBB(NextBB).addImm(CC).addReg(CCReg); Br.MI = &MBB->back(); - BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back()); + BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); if (isThumb) BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB) .addImm(ARMCC::AL).addReg(0); else BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB); - BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back()); + BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr); ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr)); // Remove the old conditional branch. It may or may not still be in MBB. - BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI); + BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI); MI->eraseFromParent(); - - // The net size change is an addition of one unconditional branch. - int delta = TII->GetInstSizeInBytes(&MBB->back()); - AdjustBBOffsetsAfter(MBB, delta); + AdjustBBOffsetsAfter(MBB); return true; } @@ -1561,7 +1652,7 @@ bool ARMConstantIslands::UndoLRSpillRestore() { return MadeChange; } -bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) { +bool ARMConstantIslands::OptimizeThumb2Instructions() { bool MadeChange = false; // Shrink ADR and LDR from constantpool. @@ -1598,19 +1689,19 @@ bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) { if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) { U.MI->setDesc(TII->get(NewOpc)); MachineBasicBlock *MBB = U.MI->getParent(); - BBSizes[MBB->getNumber()] -= 2; - AdjustBBOffsetsAfter(MBB, -2); + BBInfo[MBB->getNumber()].Size -= 2; + AdjustBBOffsetsAfter(MBB); ++NumT2CPShrunk; MadeChange = true; } } - MadeChange |= OptimizeThumb2Branches(MF); - MadeChange |= OptimizeThumb2JumpTables(MF); + MadeChange |= OptimizeThumb2Branches(); + MadeChange |= OptimizeThumb2JumpTables(); return MadeChange; } -bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { +bool ARMConstantIslands::OptimizeThumb2Branches() { bool MadeChange = false; for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) { @@ -1639,8 +1730,8 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { if (BBIsInRange(Br.MI, DestBB, MaxOffs)) { Br.MI->setDesc(TII->get(NewOpc)); MachineBasicBlock *MBB = Br.MI->getParent(); - BBSizes[MBB->getNumber()] -= 2; - AdjustBBOffsetsAfter(MBB, -2); + BBInfo[MBB->getNumber()].Size -= 2; + AdjustBBOffsetsAfter(MBB); ++NumT2BrShrunk; MadeChange = true; } @@ -1663,7 +1754,7 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { // Check if the distance is within 126. Subtract starting offset by 2 // because the cmp will be eliminated. unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2; - unsigned DestOffset = BBOffsets[DestBB->getNumber()]; + unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) { MachineBasicBlock::iterator CmpMI = Br.MI; if (CmpMI != Br.MI->getParent()->begin()) { @@ -1681,8 +1772,8 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { CmpMI->eraseFromParent(); Br.MI->eraseFromParent(); Br.MI = NewBR; - BBSizes[MBB->getNumber()] -= 2; - AdjustBBOffsetsAfter(MBB, -2); + BBInfo[MBB->getNumber()].Size -= 2; + AdjustBBOffsetsAfter(MBB); ++NumCBZ; MadeChange = true; } @@ -1696,12 +1787,12 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { /// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller /// jumptables when it's possible. -bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { +bool ARMConstantIslands::OptimizeThumb2JumpTables() { bool MadeChange = false; // FIXME: After the tables are shrunk, can we get rid some of the // constantpool tables? - MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); if (MJTI == 0) return false; const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); @@ -1709,7 +1800,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { MachineInstr *MI = T2JumpTables[i]; const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); - unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2); + unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2); MachineOperand JTOP = MI->getOperand(JTOpIdx); unsigned JTI = JTOP.getIndex(); assert(JTI < JT.size()); @@ -1720,7 +1811,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) { MachineBasicBlock *MBB = JTBBs[j]; - unsigned DstOffset = BBOffsets[MBB->getNumber()]; + unsigned DstOffset = BBInfo[MBB->getNumber()].Offset; // Negative offset is not ok. FIXME: We should change BB layout to make // sure all the branches are forward. if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2) @@ -1808,8 +1899,8 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { MI->eraseFromParent(); int delta = OrigSize - NewSize; - BBSizes[MBB->getNumber()] -= delta; - AdjustBBOffsetsAfter(MBB, -delta); + BBInfo[MBB->getNumber()].Size -= delta; + AdjustBBOffsetsAfter(MBB); ++NumTBs; MadeChange = true; @@ -1821,10 +1912,10 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { /// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that /// jump tables always branch forwards, since that's what tbb and tbh need. -bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { +bool ARMConstantIslands::ReorderThumb2JumpTables() { bool MadeChange = false; - MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); if (MJTI == 0) return false; const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); @@ -1832,7 +1923,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { MachineInstr *MI = T2JumpTables[i]; const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); - unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2); + unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2); MachineOperand JTOP = MI->getOperand(JTOpIdx); unsigned JTI = JTOP.getIndex(); assert(JTI < JT.size()); @@ -1864,8 +1955,6 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { MachineBasicBlock *ARMConstantIslands:: AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { - MachineFunction &MF = *BB->getParent(); - // If the destination block is terminated by an unconditional branch, // try to move it; otherwise, create a new block following the jump // table that branches back to the actual target. This is a very simple @@ -1882,22 +1971,22 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) // If the block ends in an unconditional branch, move it. The prior block // has to have an analyzable terminator for us to move this one. Be paranoid // and make sure we're not trying to move the entry block of the function. - if (!B && Cond.empty() && BB != MF.begin() && + if (!B && Cond.empty() && BB != MF->begin() && !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) { BB->moveAfter(JTBB); OldPrior->updateTerminator(); BB->updateTerminator(); // Update numbering to account for the block being moved. - MF.RenumberBlocks(); + MF->RenumberBlocks(); ++NumJTMoved; return NULL; } // Create a new MBB for the code after the jump BB. MachineBasicBlock *NewBB = - MF.CreateMachineBasicBlock(JTBB->getBasicBlock()); + MF->CreateMachineBasicBlock(JTBB->getBasicBlock()); MachineFunction::iterator MBBI = JTBB; ++MBBI; - MF.insert(MBBI, NewBB); + MF->insert(MBBI, NewBB); // Add an unconditional branch from NewBB to BB. // There doesn't seem to be meaningful DebugInfo available; this doesn't @@ -1907,7 +1996,7 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) .addImm(ARMCC::AL).addReg(0); // Update internal data structures to account for the newly inserted MBB. - MF.RenumberBlocks(NewBB); + MF->RenumberBlocks(NewBB); // Update the CFG. NewBB->addSuccessor(BB); diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index fc464ea..01d772d 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -61,7 +61,7 @@ namespace { void ExpandVST(MachineBasicBlock::iterator &MBBI); void ExpandLaneOp(MachineBasicBlock::iterator &MBBI); void ExpandVTBL(MachineBasicBlock::iterator &MBBI, - unsigned Opc, bool IsExt, unsigned NumRegs); + unsigned Opc, bool IsExt); void ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI); }; @@ -129,12 +129,15 @@ namespace { } static const NEONLdStTableEntry NEONLdStTable[] = { -{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, false, SingleSpc, 2, 4,true}, -{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true, true, SingleSpc, 2, 4,true}, -{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, false, SingleSpc, 2, 2,true}, -{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true, true, SingleSpc, 2, 2,true}, -{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, false, SingleSpc, 2, 8,true}, -{ ARM::VLD1DUPq8Pseudo_UPD, ARM::VLD1DUPq8_UPD, true, true, true, SingleSpc, 2, 8,true}, +{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, false, SingleSpc, 2, 4,false}, +{ ARM::VLD1DUPq16PseudoWB_fixed, ARM::VLD1DUPq16wb_fixed, true, true, true, SingleSpc, 2, 4,false}, +{ ARM::VLD1DUPq16PseudoWB_register, ARM::VLD1DUPq16wb_register, true, true, true, SingleSpc, 2, 4,false}, +{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, false, SingleSpc, 2, 2,false}, +{ ARM::VLD1DUPq32PseudoWB_fixed, ARM::VLD1DUPq32wb_fixed, true, true, false, SingleSpc, 2, 2,false}, +{ ARM::VLD1DUPq32PseudoWB_register, ARM::VLD1DUPq32wb_register, true, true, true, SingleSpc, 2, 2,false}, +{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, false, SingleSpc, 2, 8,false}, +{ ARM::VLD1DUPq8PseudoWB_fixed, ARM::VLD1DUPq8wb_fixed, true, true, false, SingleSpc, 2, 8,false}, +{ ARM::VLD1DUPq8PseudoWB_register, ARM::VLD1DUPq8wb_register, true, true, true, SingleSpc, 2, 8,false}, { ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true}, { ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true}, @@ -177,18 +180,24 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true}, { ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, false, SingleSpc, 2, 4 ,false}, -{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, true, SingleSpc, 2, 4 ,false}, +{ ARM::VLD2d16PseudoWB_fixed, ARM::VLD2d16wb_fixed, true, true, false, SingleSpc, 2, 4 ,false}, +{ ARM::VLD2d16PseudoWB_register, ARM::VLD2d16wb_register, true, true, true, SingleSpc, 2, 4 ,false}, { ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, false, SingleSpc, 2, 2 ,false}, -{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, true, SingleSpc, 2, 2 ,false}, +{ ARM::VLD2d32PseudoWB_fixed, ARM::VLD2d32wb_fixed, true, true, false, SingleSpc, 2, 2 ,false}, +{ ARM::VLD2d32PseudoWB_register, ARM::VLD2d32wb_register, true, true, true, SingleSpc, 2, 2 ,false}, { ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, false, SingleSpc, 2, 8 ,false}, -{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, true, SingleSpc, 2, 8 ,false}, +{ ARM::VLD2d8PseudoWB_fixed, ARM::VLD2d8wb_fixed, true, true, false, SingleSpc, 2, 8 ,false}, +{ ARM::VLD2d8PseudoWB_register, ARM::VLD2d8wb_register, true, true, true, SingleSpc, 2, 8 ,false}, { ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false}, -{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, true, SingleSpc, 4, 4 ,false}, +{ ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false}, +{ ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false}, { ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false}, -{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, true, SingleSpc, 4, 2 ,false}, +{ ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false}, +{ ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false}, { ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false}, -{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, true, SingleSpc, 4, 8 ,false}, +{ ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false}, +{ ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false}, { ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true}, { ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true}, @@ -267,10 +276,12 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true}, { ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true}, -{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,true}, -{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, true, SingleSpc, 4, 1 ,true}, -{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,true}, -{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, true, SingleSpc, 3, 1 ,true}, +{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false}, +{ ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false}, +{ ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false}, +{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false}, +{ ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false}, +{ ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false}, { ARM::VST1q16Pseudo, ARM::VST1q16, false, false, false, SingleSpc, 2, 4 ,false}, { ARM::VST1q16PseudoWB_fixed, ARM::VST1q16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false}, @@ -296,19 +307,25 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true}, { ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true}, -{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,true}, -{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, true, SingleSpc, 2, 4 ,true}, -{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,true}, -{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, true, SingleSpc, 2, 2 ,true}, -{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,true}, -{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, true, SingleSpc, 2, 8 ,true}, - -{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,true}, -{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, true, SingleSpc, 4, 4 ,true}, -{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,true}, -{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, true, SingleSpc, 4, 2 ,true}, -{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,true}, -{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, true, SingleSpc, 4, 8 ,true}, +{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,false}, +{ ARM::VST2d16PseudoWB_fixed, ARM::VST2d16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false}, +{ ARM::VST2d16PseudoWB_register, ARM::VST2d16wb_register, false, true, true, SingleSpc, 2, 4 ,false}, +{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,false}, +{ ARM::VST2d32PseudoWB_fixed, ARM::VST2d32wb_fixed, false, true, true, SingleSpc, 2, 2 ,false}, +{ ARM::VST2d32PseudoWB_register, ARM::VST2d32wb_register, false, true, true, SingleSpc, 2, 2 ,false}, +{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,false}, +{ ARM::VST2d8PseudoWB_fixed, ARM::VST2d8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false}, +{ ARM::VST2d8PseudoWB_register, ARM::VST2d8wb_register, false, true, true, SingleSpc, 2, 8 ,false}, + +{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false}, +{ ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false}, +{ ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false}, +{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false}, +{ ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false}, +{ ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false}, +{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false}, +{ ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false}, +{ ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false}, { ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true}, { ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true}, @@ -620,7 +637,7 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { /// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ /// register operands to real instructions with D register operands. void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, - unsigned Opc, bool IsExt, unsigned NumRegs) { + unsigned Opc, bool IsExt) { MachineInstr &MI = *MBBI; MachineBasicBlock &MBB = *MI.getParent(); @@ -636,11 +653,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3); - MIB.addReg(D0).addReg(D1); - if (NumRegs > 2) - MIB.addReg(D2); - if (NumRegs > 3) - MIB.addReg(D3); + MIB.addReg(D0); // Copy the other source register operand. MIB.addOperand(MI.getOperand(OpIdx++)); @@ -1090,12 +1103,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD2q8Pseudo: case ARM::VLD2q16Pseudo: case ARM::VLD2q32Pseudo: - case ARM::VLD2d8Pseudo_UPD: - case ARM::VLD2d16Pseudo_UPD: - case ARM::VLD2d32Pseudo_UPD: - case ARM::VLD2q8Pseudo_UPD: - case ARM::VLD2q16Pseudo_UPD: - case ARM::VLD2q32Pseudo_UPD: + case ARM::VLD2d8PseudoWB_fixed: + case ARM::VLD2d16PseudoWB_fixed: + case ARM::VLD2d32PseudoWB_fixed: + case ARM::VLD2q8PseudoWB_fixed: + case ARM::VLD2q16PseudoWB_fixed: + case ARM::VLD2q32PseudoWB_fixed: + case ARM::VLD2d8PseudoWB_register: + case ARM::VLD2d16PseudoWB_register: + case ARM::VLD2d32PseudoWB_register: + case ARM::VLD2q8PseudoWB_register: + case ARM::VLD2q16PseudoWB_register: + case ARM::VLD2q32PseudoWB_register: case ARM::VLD3d8Pseudo: case ARM::VLD3d16Pseudo: case ARM::VLD3d32Pseudo: @@ -1131,9 +1150,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD1DUPq8Pseudo: case ARM::VLD1DUPq16Pseudo: case ARM::VLD1DUPq32Pseudo: - case ARM::VLD1DUPq8Pseudo_UPD: - case ARM::VLD1DUPq16Pseudo_UPD: - case ARM::VLD1DUPq32Pseudo_UPD: + case ARM::VLD1DUPq8PseudoWB_fixed: + case ARM::VLD1DUPq16PseudoWB_fixed: + case ARM::VLD1DUPq32PseudoWB_fixed: + case ARM::VLD1DUPq8PseudoWB_register: + case ARM::VLD1DUPq16PseudoWB_register: + case ARM::VLD1DUPq32PseudoWB_register: case ARM::VLD2DUPd8Pseudo: case ARM::VLD2DUPd16Pseudo: case ARM::VLD2DUPd32Pseudo: @@ -1173,12 +1195,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VST2q8Pseudo: case ARM::VST2q16Pseudo: case ARM::VST2q32Pseudo: - case ARM::VST2d8Pseudo_UPD: - case ARM::VST2d16Pseudo_UPD: - case ARM::VST2d32Pseudo_UPD: - case ARM::VST2q8Pseudo_UPD: - case ARM::VST2q16Pseudo_UPD: - case ARM::VST2q32Pseudo_UPD: + case ARM::VST2d8PseudoWB_fixed: + case ARM::VST2d16PseudoWB_fixed: + case ARM::VST2d32PseudoWB_fixed: + case ARM::VST2q8PseudoWB_fixed: + case ARM::VST2q16PseudoWB_fixed: + case ARM::VST2q32PseudoWB_fixed: + case ARM::VST2d8PseudoWB_register: + case ARM::VST2d16PseudoWB_register: + case ARM::VST2d32PseudoWB_register: + case ARM::VST2q8PseudoWB_register: + case ARM::VST2q16PseudoWB_register: + case ARM::VST2q32PseudoWB_register: case ARM::VST3d8Pseudo: case ARM::VST3d16Pseudo: case ARM::VST3d32Pseudo: @@ -1186,7 +1214,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VST3d8Pseudo_UPD: case ARM::VST3d16Pseudo_UPD: case ARM::VST3d32Pseudo_UPD: - case ARM::VST1d64TPseudo_UPD: + case ARM::VST1d64TPseudoWB_fixed: + case ARM::VST1d64TPseudoWB_register: case ARM::VST3q8Pseudo_UPD: case ARM::VST3q16Pseudo_UPD: case ARM::VST3q32Pseudo_UPD: @@ -1203,7 +1232,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VST4d8Pseudo_UPD: case ARM::VST4d16Pseudo_UPD: case ARM::VST4d32Pseudo_UPD: - case ARM::VST1d64QPseudo_UPD: + case ARM::VST1d64QPseudoWB_fixed: + case ARM::VST1d64QPseudoWB_register: case ARM::VST4q8Pseudo_UPD: case ARM::VST4q16Pseudo_UPD: case ARM::VST4q32Pseudo_UPD: @@ -1291,12 +1321,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandLaneOp(MBBI); return true; - case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true; - case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true; - case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true; - case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true; - case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true; - case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true; + case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false); return true; + case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true; + case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; + case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true); return true; + case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; + case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true; } return false; diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 9bae422..a98dfc3 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -178,10 +178,12 @@ class ARMFastISel : public FastISel { bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt); - bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, bool isZExt, - bool allocReg); + bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, + unsigned Alignment = 0, bool isZExt = true, + bool allocReg = true); - bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); + bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, + unsigned Alignment = 0); bool ARMComputeAddress(const Value *Obj, Address &Addr); void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3); bool ARMIsMemCpySmall(uint64_t Len); @@ -227,8 +229,7 @@ class ARMFastISel : public FastISel { // we don't care about implicit defs here, just places we'll need to add a // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR. bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.hasOptionalDef()) + if (!MI->hasOptionalDef()) return false; // Look to see if our OptionalDef is defining CPSR or CCR. @@ -702,7 +703,7 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { TargetRegisterClass* RC = TLI.getRegClassFor(VT); unsigned ResultReg = createResultReg(RC); unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; - AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addFrameIndex(SI->second) .addImm(0)); @@ -898,7 +899,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { ARM::GPRRegisterClass; unsigned ResultReg = createResultReg(RC); unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; - AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addFrameIndex(Addr.Base.FI) .addImm(0)); @@ -937,7 +938,8 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, // Now add the rest of the operands. MIB.addFrameIndex(FI); - // ARM halfword load/stores and signed byte loads need an additional operand. + // ARM halfword load/stores and signed byte loads need an additional + // operand. if (useAM3) { signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; MIB.addReg(0); @@ -950,7 +952,8 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, // Now add the rest of the operands. MIB.addReg(Addr.Base.Reg); - // ARM halfword load/stores and signed byte loads need an additional operand. + // ARM halfword load/stores and signed byte loads need an additional + // operand. if (useAM3) { signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; MIB.addReg(0); @@ -963,10 +966,11 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, } bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, - bool isZExt = true, bool allocReg = true) { + unsigned Alignment, bool isZExt, bool allocReg) { assert(VT.isSimple() && "Non-simple types are invalid here!"); unsigned Opc; bool useAM3 = false; + bool needVMOV = false; TargetRegisterClass *RC; switch (VT.getSimpleVT().SimpleTy) { // This is mostly going to be Neon/vector support. @@ -1012,10 +1016,25 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, RC = ARM::GPRRegisterClass; break; case MVT::f32: - Opc = ARM::VLDRS; - RC = TLI.getRegClassFor(VT); + if (!Subtarget->hasVFP2()) return false; + // Unaligned loads need special handling. Floats require word-alignment. + if (Alignment && Alignment < 4) { + needVMOV = true; + VT = MVT::i32; + Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; + RC = ARM::GPRRegisterClass; + } else { + Opc = ARM::VLDRS; + RC = TLI.getRegClassFor(VT); + } break; case MVT::f64: + if (!Subtarget->hasVFP2()) return false; + // FIXME: Unaligned loads need special handling. Doublewords require + // word-alignment. + if (Alignment && Alignment < 4) + return false; + Opc = ARM::VLDRD; RC = TLI.getRegClassFor(VT); break; @@ -1030,6 +1049,16 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg); AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3); + + // If we had an unaligned load of a float we've converted it to an regular + // load. Now we must move from the GRP to the FP register. + if (needVMOV) { + unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::VMOVSR), MoveReg) + .addReg(ResultReg)); + ResultReg = MoveReg; + } return true; } @@ -1048,12 +1077,14 @@ bool ARMFastISel::SelectLoad(const Instruction *I) { if (!ARMComputeAddress(I->getOperand(0), Addr)) return false; unsigned ResultReg; - if (!ARMEmitLoad(VT, ResultReg, Addr)) return false; + if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment())) + return false; UpdateValueMap(I, ResultReg); return true; } -bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { +bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, + unsigned Alignment) { unsigned StrOpc; bool useAM3 = false; switch (VT.getSimpleVT().SimpleTy) { @@ -1101,10 +1132,26 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { break; case MVT::f32: if (!Subtarget->hasVFP2()) return false; - StrOpc = ARM::VSTRS; + // Unaligned stores need special handling. Floats require word-alignment. + if (Alignment && Alignment < 4) { + unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::VMOVRS), MoveReg) + .addReg(SrcReg)); + SrcReg = MoveReg; + VT = MVT::i32; + StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12; + } else { + StrOpc = ARM::VSTRS; + } break; case MVT::f64: if (!Subtarget->hasVFP2()) return false; + // FIXME: Unaligned stores need special handling. Doublewords require + // word-alignment. + if (Alignment && Alignment < 4) + return false; + StrOpc = ARM::VSTRD; break; } @@ -1141,7 +1188,8 @@ bool ARMFastISel::SelectStore(const Instruction *I) { if (!ARMComputeAddress(I->getOperand(1), Addr)) return false; - if (!ARMEmitStore(VT, SrcReg, Addr)) return false; + if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment())) + return false; return true; } @@ -1360,7 +1408,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, unsigned SrcReg1 = getRegForValue(Src1Value); if (SrcReg1 == 0) return false; - unsigned SrcReg2; + unsigned SrcReg2 = 0; if (!UseImm) { SrcReg2 = getRegForValue(Src2Value); if (SrcReg2 == 0) return false; @@ -1577,7 +1625,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { (ARM_AM::getSOImmVal(Imm) != -1); } - unsigned Op2Reg; + unsigned Op2Reg = 0; if (!UseImm) { Op2Reg = getRegForValue(I->getOperand(2)); if (Op2Reg == 0) return false; @@ -1716,7 +1764,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) { // Use target triple & subtarget features to do actual dispatch. if (Subtarget->isAAPCS_ABI()) { if (Subtarget->hasVFP2() && - FloatABIType == FloatABI::Hard) + TM.Options.FloatABIType == FloatABI::Hard) return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); else return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); @@ -1765,21 +1813,23 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, switch (VA.getLocInfo()) { case CCValAssign::Full: break; case CCValAssign::SExt: { - EVT DestVT = VA.getLocVT(); + MVT DestVT = VA.getLocVT(); unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false); assert (ResultReg != 0 && "Failed to emit a sext"); Arg = ResultReg; + ArgVT = DestVT; break; } case CCValAssign::AExt: // Intentional fall-through. Handle AExt and ZExt. case CCValAssign::ZExt: { - EVT DestVT = VA.getLocVT(); + MVT DestVT = VA.getLocVT(); unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true); assert (ResultReg != 0 && "Failed to emit a sext"); Arg = ResultReg; + ArgVT = DestVT; break; } case CCValAssign::BCvt: { @@ -2456,7 +2506,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false; unsigned ResultReg = MI->getOperand(0).getReg(); - if (!ARMEmitLoad(VT, ResultReg, Addr, isZExt, false)) + if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false)) return false; MI->eraseFromParent(); return true; diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 2d1de6f..06944b1 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -37,7 +37,8 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); // Always eliminate non-leaf frame pointers. - return ((DisableFramePointerElim(MF) && MFI->hasCalls()) || + return ((MF.getTarget().Options.DisableFramePointerElim(MF) && + MFI->hasCalls()) || RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()); @@ -309,8 +310,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - assert(MBBI->getDesc().isReturn() && - "Can only insert epilog into returning blocks"); + assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index 787f6a2..a5fd15b 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -21,7 +21,7 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, // FIXME: Detect integer instructions properly. const MCInstrDesc &MCID = MI->getDesc(); unsigned Domain = MCID.TSFlags & ARMII::DomainMask; - if (MCID.mayStore()) + if (MI->mayStore()) return false; unsigned Opcode = MCID.getOpcode(); if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) @@ -38,9 +38,6 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { MachineInstr *MI = SU->getInstr(); if (!MI->isDebugValue()) { - if (ITBlockSize && MI != ITBlockMIs[ITBlockSize-1]) - return Hazard; - // Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following // a VMLA / VMLS will cause 4 cycle stall. const MCInstrDesc &MCID = MI->getDesc(); @@ -48,9 +45,9 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { MachineInstr *DefMI = LastMI; const MCInstrDesc &LastMCID = LastMI->getDesc(); // Skip over one non-VFP / NEON instruction. - if (!LastMCID.isBarrier() && + if (!LastMI->isBarrier() && // On A9, AGU and NEON/FPU are muxed. - !(STI.isCortexA9() && (LastMCID.mayLoad() || LastMCID.mayStore())) && + !(STI.isCortexA9() && (LastMI->mayLoad() || LastMI->mayStore())) && (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) { MachineBasicBlock::iterator I = LastMI; if (I != LastMI->getParent()->begin()) { @@ -76,30 +73,11 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { void ARMHazardRecognizer::Reset() { LastMI = 0; FpMLxStalls = 0; - ITBlockSize = 0; ScoreboardHazardRecognizer::Reset(); } void ARMHazardRecognizer::EmitInstruction(SUnit *SU) { MachineInstr *MI = SU->getInstr(); - unsigned Opcode = MI->getOpcode(); - if (ITBlockSize) { - --ITBlockSize; - } else if (Opcode == ARM::t2IT) { - unsigned Mask = MI->getOperand(1).getImm(); - unsigned NumTZ = CountTrailingZeros_32(Mask); - assert(NumTZ <= 3 && "Invalid IT mask!"); - ITBlockSize = 4 - NumTZ; - MachineBasicBlock::iterator I = MI; - for (unsigned i = 0; i < ITBlockSize; ++i) { - // Advance to the next instruction, skipping any dbg_value instructions. - do { - ++I; - } while (I->isDebugValue()); - ITBlockMIs[ITBlockSize-1-i] = &*I; - } - } - if (!MI->isDebugValue()) { LastMI = MI; FpMLxStalls = 0; diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h index 2bc218d..98bfc4c 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.h +++ b/lib/Target/ARM/ARMHazardRecognizer.h @@ -23,6 +23,10 @@ class ARMBaseRegisterInfo; class ARMSubtarget; class MachineInstr; +/// ARMHazardRecognizer handles special constraints that are not expressed in +/// the scheduling itinerary. This is only used during postRA scheduling. The +/// ARM preRA scheduler uses an unspecialized instance of the +/// ScoreboardHazardRecognizer. class ARMHazardRecognizer : public ScoreboardHazardRecognizer { const ARMBaseInstrInfo &TII; const ARMBaseRegisterInfo &TRI; @@ -30,8 +34,6 @@ class ARMHazardRecognizer : public ScoreboardHazardRecognizer { MachineInstr *LastMI; unsigned FpMLxStalls; - unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled. - MachineInstr *ITBlockMIs[4]; public: ARMHazardRecognizer(const InstrItineraryData *ItinData, @@ -40,7 +42,7 @@ public: const ARMSubtarget &sti, const ScheduleDAG *DAG) : ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii), - TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {} + TRI(tri), STI(sti), LastMI(0) {} virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void Reset(); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index bc8588f..7473141 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1579,6 +1579,22 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case ARM::VST1q16PseudoWB_fixed: return ARM::VST1q16PseudoWB_register; case ARM::VST1q32PseudoWB_fixed: return ARM::VST1q32PseudoWB_register; case ARM::VST1q64PseudoWB_fixed: return ARM::VST1q64PseudoWB_register; + case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; + case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; + + case ARM::VLD2d8PseudoWB_fixed: return ARM::VLD2d8PseudoWB_register; + case ARM::VLD2d16PseudoWB_fixed: return ARM::VLD2d16PseudoWB_register; + case ARM::VLD2d32PseudoWB_fixed: return ARM::VLD2d32PseudoWB_register; + case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; + case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; + case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; + + case ARM::VST2d8PseudoWB_fixed: return ARM::VST2d8PseudoWB_register; + case ARM::VST2d16PseudoWB_fixed: return ARM::VST2d16PseudoWB_register; + case ARM::VST2d32PseudoWB_fixed: return ARM::VST2d32PseudoWB_register; + case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; + case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; + case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; } return Opc; // If not one we handle, return it unchanged. } @@ -1646,13 +1662,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - // FIXME: VLD1 fixed increment doesn't need Reg0. Remove the reg0 + // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 // case entirely when the rest are updated to that form, too. - if (NumVecs == 1 && !isa<ConstantSDNode>(Inc.getNode())) + if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode())) Opc = getVLDSTRegisterUpdateOpcode(Opc); - // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so + // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs != 1 && Opc != ARM::VLD1q64PseudoWB_fixed) || + if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64PseudoWB_fixed) || !isa<ConstantSDNode>(Inc.getNode())) Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); } @@ -1796,9 +1812,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - // FIXME: VST1 fixed increment doesn't need Reg0. Remove the reg0 + // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0 // case entirely when the rest are updated to that form, too. - if (NumVecs == 1 && !isa<ConstantSDNode>(Inc.getNode())) + if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) Opc = getVLDSTRegisterUpdateOpcode(Opc); // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. @@ -2810,10 +2826,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VLD2_UPD: { - unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD, - ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed}; - unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD, - ARM::VLD2q32Pseudo_UPD }; + unsigned DOpcodes[] = { ARM::VLD2d8PseudoWB_fixed, + ARM::VLD2d16PseudoWB_fixed, + ARM::VLD2d32PseudoWB_fixed, + ARM::VLD1q64PseudoWB_fixed}; + unsigned QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, + ARM::VLD2q16PseudoWB_fixed, + ARM::VLD2q32PseudoWB_fixed }; return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0); } @@ -2876,16 +2895,19 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VST2_UPD: { - unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD, - ARM::VST2d32Pseudo_UPD, ARM::VST1q64PseudoWB_fixed}; - unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD, - ARM::VST2q32Pseudo_UPD }; + unsigned DOpcodes[] = { ARM::VST2d8PseudoWB_fixed, + ARM::VST2d16PseudoWB_fixed, + ARM::VST2d32PseudoWB_fixed, + ARM::VST1q64PseudoWB_fixed}; + unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, + ARM::VST2q16PseudoWB_fixed, + ARM::VST2q32PseudoWB_fixed }; return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0); } case ARMISD::VST3_UPD: { unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD, - ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD }; + ARM::VST3d32Pseudo_UPD,ARM::VST1d64TPseudoWB_fixed}; unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, ARM::VST3q16Pseudo_UPD, ARM::VST3q32Pseudo_UPD }; @@ -2897,7 +2919,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VST4_UPD: { unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, - ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD }; + ARM::VST4d32Pseudo_UPD,ARM::VST1d64QPseudoWB_fixed}; unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, ARM::VST4q16Pseudo_UPD, ARM::VST4q32Pseudo_UPD }; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 8c4c06f..c6c1f5b 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -72,7 +72,7 @@ ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true)); -namespace llvm { +namespace { class ARMCCState : public CCState { public: ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, @@ -432,7 +432,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); else addRegisterClass(MVT::i32, ARM::GPRRegisterClass); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && + !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, ARM::SPRRegisterClass); if (!Subtarget->isFPOnlySP()) addRegisterClass(MVT::f64, ARM::DPRRegisterClass); @@ -467,13 +468,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // v2f64 is legal so that QR subregs can be extracted as f64 elements, but // neither Neon nor VFP support any arithmetic operations on it. + // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively + // supported for v4f32. setOperationAction(ISD::FADD, MVT::v2f64, Expand); setOperationAction(ISD::FSUB, MVT::v2f64, Expand); setOperationAction(ISD::FMUL, MVT::v2f64, Expand); + // FIXME: Code duplication: FDIV and FREM are expanded always, see + // ARMTargetLowering::addTypeForNEON method for details. setOperationAction(ISD::FDIV, MVT::v2f64, Expand); setOperationAction(ISD::FREM, MVT::v2f64, Expand); + // FIXME: Create unittest. + // In another words, find a way when "copysign" appears in DAG with vector + // operands. setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); + // FIXME: Code duplication: SETCC has custom operation action, see + // ARMTargetLowering::addTypeForNEON method for details. setOperationAction(ISD::SETCC, MVT::v2f64, Expand); + // FIXME: Create unittest for FNEG and for FABS. setOperationAction(ISD::FNEG, MVT::v2f64, Expand); setOperationAction(ISD::FABS, MVT::v2f64, Expand); setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); @@ -486,11 +497,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); setOperationAction(ISD::FEXP, MVT::v2f64, Expand); setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); + // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); setOperationAction(ISD::FRINT, MVT::v2f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); + + setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); + setOperationAction(ISD::FSIN, MVT::v4f32, Expand); + setOperationAction(ISD::FCOS, MVT::v4f32, Expand); + setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); + setOperationAction(ISD::FPOW, MVT::v4f32, Expand); + setOperationAction(ISD::FLOG, MVT::v4f32, Expand); + setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); + setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); + setOperationAction(ISD::FEXP, MVT::v4f32, Expand); + setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); // Neon does not support some operations on v1i64 and v2i64 types. setOperationAction(ISD::MUL, MVT::v1i64, Expand); @@ -586,6 +609,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) setOperationAction(ISD::CTLZ, MVT::i32, Expand); + // These just redirect to CTTZ and CTLZ on ARM. + setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); + // Only ARMv6 has BSWAP. if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); @@ -674,7 +701,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && + !Subtarget->isThumb1Only()) { // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR // iff target supports vfp2. setOperationAction(ISD::BITCAST, MVT::i64, Custom); @@ -712,7 +740,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && + !Subtarget->isThumb1Only()) { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); } @@ -723,7 +752,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FMA, MVT::f32, Expand); // Various VFP goodness - if (!UseSoftFloat && !Subtarget->isThumb1Only()) { + if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { // int <-> fp are custom expanded into bit_convert + ARMISD ops. if (Subtarget->hasVFP2()) { setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); @@ -751,7 +780,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setStackPointerRegisterToSaveRestore(ARM::SP); - if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) + if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() || + !Subtarget->hasVFP2()) setSchedulingPreference(Sched::RegPressure); else setSchedulingPreference(Sched::Hybrid); @@ -1092,7 +1122,8 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, if (!Subtarget->isAAPCS_ABI()) return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); else if (Subtarget->hasVFP2() && - FloatABIType == FloatABI::Hard && !isVarArg) + getTargetMachine().Options.FloatABIType == FloatABI::Hard && + !isVarArg) return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); } @@ -2951,7 +2982,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); - if (UnsafeFPMath && + if (getTargetMachine().Options.UnsafeFPMath && (CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETNE || CC == ISD::SETUNE)) { SDValue Result = OptimizeVFPBrcond(Op, DAG); @@ -3978,9 +4009,8 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, } // Use vmov.f32 to materialize other v2f32 and v4f32 splats. - if (VT == MVT::v2f32 || VT == MVT::v4f32) { - ConstantFPSDNode *C = cast<ConstantFPSDNode>(Op.getOperand(0)); - int ImmVal = ARM_AM::getFP32Imm(C->getValueAPF()); + if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) { + int ImmVal = ARM_AM::getFP32Imm(SplatBits); if (ImmVal != -1) { SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); @@ -6010,7 +6040,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { // executed. for (MachineBasicBlock::reverse_iterator II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { - if (!II->getDesc().isCall()) continue; + if (!II->isCall()) continue; DenseMap<unsigned, bool> DefRegs; for (MachineInstr::mop_iterator @@ -6421,13 +6451,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const { - const MCInstrDesc *MCID = &MI->getDesc(); - if (!MCID->hasPostISelHook()) { + if (!MI->hasPostISelHook()) { assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'"); return; } + const MCInstrDesc *MCID = &MI->getDesc(); // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, // RSC. Coming out of isel, they have an implicit CPSR def, but the optional // operand is still set to noreg. If needed, set the optional operand's @@ -6454,7 +6484,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // Any ARM instruction that sets the 's' bit should specify an optional // "cc_out" operand in the last operand position. - if (!MCID->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { + if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { assert(!NewOpc && "Optional cc_out operand required"); return; } @@ -7948,7 +7978,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, // will return -0, so vmin can only be used for unsafe math or if one of // the operands is known to be nonzero. if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && - !UnsafeFPMath && + !DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; @@ -7970,7 +8000,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, // will return +0, so vmax can only be used for unsafe math or if one of // the operands is known to be nonzero. if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && - !UnsafeFPMath && + !DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 6940156..80f3773 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -201,21 +201,29 @@ def msr_mask : Operand<i32> { // 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0> // 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0> // 64 64 - <imm> is encoded in imm6<5:0> +def shr_imm8_asm_operand : ImmAsmOperand { let Name = "ShrImm8"; } def shr_imm8 : Operand<i32> { let EncoderMethod = "getShiftRight8Imm"; let DecoderMethod = "DecodeShiftRight8Imm"; + let ParserMatchClass = shr_imm8_asm_operand; } +def shr_imm16_asm_operand : ImmAsmOperand { let Name = "ShrImm16"; } def shr_imm16 : Operand<i32> { let EncoderMethod = "getShiftRight16Imm"; let DecoderMethod = "DecodeShiftRight16Imm"; + let ParserMatchClass = shr_imm16_asm_operand; } +def shr_imm32_asm_operand : ImmAsmOperand { let Name = "ShrImm32"; } def shr_imm32 : Operand<i32> { let EncoderMethod = "getShiftRight32Imm"; let DecoderMethod = "DecodeShiftRight32Imm"; + let ParserMatchClass = shr_imm32_asm_operand; } +def shr_imm64_asm_operand : ImmAsmOperand { let Name = "ShrImm64"; } def shr_imm64 : Operand<i32> { let EncoderMethod = "getShiftRight64Imm"; let DecoderMethod = "DecodeShiftRight64Imm"; + let ParserMatchClass = shr_imm64_asm_operand; } //===----------------------------------------------------------------------===// @@ -231,6 +239,14 @@ class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1> : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>; class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1> : InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>; +class NEONInstAlias<string Asm, dag Result, bit Emit = 0b1> + : InstAlias<Asm, Result, Emit>, Requires<[HasNEON]>; + + +class VFP2MnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>, + Requires<[HasVFP2]>; +class NEONMnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>, + Requires<[HasNEON]>; //===----------------------------------------------------------------------===// // ARM Instruction templates. @@ -1994,73 +2010,111 @@ class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> { // VFP/NEON Instruction aliases for type suffices. class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result> : - InstAlias<!strconcat(opc, dt, asm), Result>; -multiclass VFPDT8ReqInstAlias<string opc, string asm, dag Result> { - def I8 : VFPDataTypeInstAlias<opc, ".i8", asm, Result>; - def S8 : VFPDataTypeInstAlias<opc, ".s8", asm, Result>; - def U8 : VFPDataTypeInstAlias<opc, ".u8", asm, Result>; - def F8 : VFPDataTypeInstAlias<opc, ".p8", asm, Result>; -} -// VFPDT8ReqInstAlias plus plain ".8" -multiclass VFPDT8InstAlias<string opc, string asm, dag Result> { - def _8 : VFPDataTypeInstAlias<opc, ".8", asm, Result>; - defm : VFPDT8ReqInstAlias<opc, asm, Result>; -} -multiclass VFPDT16ReqInstAlias<string opc, string asm, dag Result> { - def I16 : VFPDataTypeInstAlias<opc, ".i16", asm, Result>; - def S16 : VFPDataTypeInstAlias<opc, ".s16", asm, Result>; - def U16 : VFPDataTypeInstAlias<opc, ".u16", asm, Result>; - def F16 : VFPDataTypeInstAlias<opc, ".p16", asm, Result>; -} -// VFPDT16ReqInstAlias plus plain ".16" -multiclass VFPDT16InstAlias<string opc, string asm, dag Result> { - def _16 : VFPDataTypeInstAlias<opc, ".16", asm, Result>; - defm : VFPDT16ReqInstAlias<opc, asm, Result>; -} -multiclass VFPDT32ReqInstAlias<string opc, string asm, dag Result> { - def I32 : VFPDataTypeInstAlias<opc, ".i32", asm, Result>; - def S32 : VFPDataTypeInstAlias<opc, ".s32", asm, Result>; - def U32 : VFPDataTypeInstAlias<opc, ".u32", asm, Result>; - def F32 : VFPDataTypeInstAlias<opc, ".f32", asm, Result>; - def F : VFPDataTypeInstAlias<opc, ".f", asm, Result>; -} -// VFPDT32ReqInstAlias plus plain ".32" -multiclass VFPDT32InstAlias<string opc, string asm, dag Result> { - def _32 : VFPDataTypeInstAlias<opc, ".32", asm, Result>; - defm : VFPDT32ReqInstAlias<opc, asm, Result>; -} -multiclass VFPDT64ReqInstAlias<string opc, string asm, dag Result> { - def I64 : VFPDataTypeInstAlias<opc, ".i64", asm, Result>; - def S64 : VFPDataTypeInstAlias<opc, ".s64", asm, Result>; - def U64 : VFPDataTypeInstAlias<opc, ".u64", asm, Result>; - def F64 : VFPDataTypeInstAlias<opc, ".f64", asm, Result>; - def D : VFPDataTypeInstAlias<opc, ".d", asm, Result>; -} -// VFPDT64ReqInstAlias plus plain ".64" -multiclass VFPDT64InstAlias<string opc, string asm, dag Result> { - def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>; - defm : VFPDT64ReqInstAlias<opc, asm, Result>; -} -multiclass VFPDT64NoF64ReqInstAlias<string opc, string asm, dag Result> { - def I64 : VFPDataTypeInstAlias<opc, ".i64", asm, Result>; - def S64 : VFPDataTypeInstAlias<opc, ".s64", asm, Result>; - def U64 : VFPDataTypeInstAlias<opc, ".u64", asm, Result>; - def D : VFPDataTypeInstAlias<opc, ".d", asm, Result>; -} -// VFPDT64ReqInstAlias plus plain ".64" -multiclass VFPDT64NoF64InstAlias<string opc, string asm, dag Result> { - def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>; - defm : VFPDT64ReqInstAlias<opc, asm, Result>; -} + InstAlias<!strconcat(opc, dt, "\t", asm), Result>, Requires<[HasVFP2]>; + multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> { - defm : VFPDT8InstAlias<opc, asm, Result>; - defm : VFPDT16InstAlias<opc, asm, Result>; - defm : VFPDT32InstAlias<opc, asm, Result>; - defm : VFPDT64InstAlias<opc, asm, Result>; -} -multiclass VFPDTAnyNoF64InstAlias<string opc, string asm, dag Result> { - defm : VFPDT8InstAlias<opc, asm, Result>; - defm : VFPDT16InstAlias<opc, asm, Result>; - defm : VFPDT32InstAlias<opc, asm, Result>; - defm : VFPDT64NoF64InstAlias<opc, asm, Result>; -} + def : VFPDataTypeInstAlias<opc, ".8", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".16", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".32", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".64", asm, Result>; +} + +// The same alias classes using AsmPseudo instead, for the more complex +// stuff in NEON that InstAlias can't quite handle. +// Note that we can't use anonymous defm references here like we can +// above, as we care about the ultimate instruction enum names generated, unlike +// for instalias defs. +class NEONDataTypeAsmPseudoInst<string opc, string dt, string asm, dag iops> : + AsmPseudoInst<!strconcat(opc, dt, "\t", asm), iops>, Requires<[HasNEON]>; +multiclass NEONDT8ReqAsmPseudoInst<string opc, string asm, dag iops> { + def I8 : NEONDataTypeAsmPseudoInst<opc, ".i8", asm, iops>; + def S8 : NEONDataTypeAsmPseudoInst<opc, ".s8", asm, iops>; + def U8 : NEONDataTypeAsmPseudoInst<opc, ".u8", asm, iops>; + def P8 : NEONDataTypeAsmPseudoInst<opc, ".p8", asm, iops>; +} +// NEONDT8ReqAsmPseudoInst plus plain ".8" +multiclass NEONDT8AsmPseudoInst<string opc, string asm, dag iops> { + def _8 : NEONDataTypeAsmPseudoInst<opc, ".8", asm, iops>; + defm _ : NEONDT8ReqAsmPseudoInst<opc, asm, iops>; +} +multiclass NEONDT16ReqAsmPseudoInst<string opc, string asm, dag iops> { + def I16 : NEONDataTypeAsmPseudoInst<opc, ".i16", asm, iops>; + def S16 : NEONDataTypeAsmPseudoInst<opc, ".s16", asm, iops>; + def U16 : NEONDataTypeAsmPseudoInst<opc, ".u16", asm, iops>; + def P16 : NEONDataTypeAsmPseudoInst<opc, ".p16", asm, iops>; +} +// NEONDT16ReqAsmPseudoInst plus plain ".16" +multiclass NEONDT16AsmPseudoInst<string opc, string asm, dag iops> { + def _16 : NEONDataTypeAsmPseudoInst<opc, ".16", asm, iops>; + defm _ : NEONDT16ReqAsmPseudoInst<opc, asm, iops>; +} +multiclass NEONDT32ReqAsmPseudoInst<string opc, string asm, dag iops> { + def I32 : NEONDataTypeAsmPseudoInst<opc, ".i32", asm, iops>; + def S32 : NEONDataTypeAsmPseudoInst<opc, ".s32", asm, iops>; + def U32 : NEONDataTypeAsmPseudoInst<opc, ".u32", asm, iops>; + def F32 : NEONDataTypeAsmPseudoInst<opc, ".f32", asm, iops>; + def F : NEONDataTypeAsmPseudoInst<opc, ".f", asm, iops>; +} +// NEONDT32ReqAsmPseudoInst plus plain ".32" +multiclass NEONDT32AsmPseudoInst<string opc, string asm, dag iops> { + def _32 : NEONDataTypeAsmPseudoInst<opc, ".32", asm, iops>; + defm _ : NEONDT32ReqAsmPseudoInst<opc, asm, iops>; +} +multiclass NEONDT64ReqAsmPseudoInst<string opc, string asm, dag iops> { + def I64 : NEONDataTypeAsmPseudoInst<opc, ".i64", asm, iops>; + def S64 : NEONDataTypeAsmPseudoInst<opc, ".s64", asm, iops>; + def U64 : NEONDataTypeAsmPseudoInst<opc, ".u64", asm, iops>; + def F64 : NEONDataTypeAsmPseudoInst<opc, ".f64", asm, iops>; + def D : NEONDataTypeAsmPseudoInst<opc, ".d", asm, iops>; +} +// NEONDT64ReqAsmPseudoInst plus plain ".64" +multiclass NEONDT64AsmPseudoInst<string opc, string asm, dag iops> { + def _64 : NEONDataTypeAsmPseudoInst<opc, ".64", asm, iops>; + defm _ : NEONDT64ReqAsmPseudoInst<opc, asm, iops>; +} +multiclass NEONDT64NoF64ReqAsmPseudoInst<string opc, string asm, dag iops> { + def I64 : NEONDataTypeAsmPseudoInst<opc, ".i64", asm, iops>; + def S64 : NEONDataTypeAsmPseudoInst<opc, ".s64", asm, iops>; + def U64 : NEONDataTypeAsmPseudoInst<opc, ".u64", asm, iops>; + def D : NEONDataTypeAsmPseudoInst<opc, ".d", asm, iops>; +} +// NEONDT64ReqAsmPseudoInst plus plain ".64" +multiclass NEONDT64NoF64AsmPseudoInst<string opc, string asm, dag iops> { + def _64 : NEONDataTypeAsmPseudoInst<opc, ".64", asm, iops>; + defm _ : NEONDT64ReqAsmPseudoInst<opc, asm, iops>; +} +multiclass NEONDTAnyAsmPseudoInst<string opc, string asm, dag iops> { + defm _ : NEONDT8AsmPseudoInst<opc, asm, iops>; + defm _ : NEONDT16AsmPseudoInst<opc, asm, iops>; + defm _ : NEONDT32AsmPseudoInst<opc, asm, iops>; + defm _ : NEONDT64AsmPseudoInst<opc, asm, iops>; +} +multiclass NEONDTAnyNoF64AsmPseudoInst<string opc, string asm, dag iops> { + defm _ : NEONDT8AsmPseudoInst<opc, asm, iops>; + defm _ : NEONDT16AsmPseudoInst<opc, asm, iops>; + defm _ : NEONDT32AsmPseudoInst<opc, asm, iops>; + defm _ : NEONDT64NoF64AsmPseudoInst<opc, asm, iops>; +} + +// Data type suffix token aliases. Implements Table A7-3 in the ARM ARM. +def : TokenAlias<".s8", ".i8">; +def : TokenAlias<".u8", ".i8">; +def : TokenAlias<".s16", ".i16">; +def : TokenAlias<".u16", ".i16">; +def : TokenAlias<".s32", ".i32">; +def : TokenAlias<".u32", ".i32">; +def : TokenAlias<".s64", ".i64">; +def : TokenAlias<".u64", ".i64">; + +def : TokenAlias<".i8", ".8">; +def : TokenAlias<".i16", ".16">; +def : TokenAlias<".i32", ".32">; +def : TokenAlias<".i64", ".64">; + +def : TokenAlias<".p8", ".8">; +def : TokenAlias<".p16", ".16">; + +def : TokenAlias<".f32", ".32">; +def : TokenAlias<".f64", ".64">; +def : TokenAlias<".f", ".f32">; +def : TokenAlias<".d", ".f64">; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index be03924..516a080 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -238,27 +238,23 @@ def so_imm_not_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32); }]>; -/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15]. -def imm1_15 : ImmLeaf<i32, [{ - return (int32_t)Imm >= 1 && (int32_t)Imm < 16; -}]>; - /// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31]. def imm16_31 : ImmLeaf<i32, [{ return (int32_t)Imm >= 16 && (int32_t)Imm < 32; }]>; -def so_imm_neg : - PatLeaf<(imm), [{ +def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; } +def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1; - }], so_imm_neg_XFORM>; + }], so_imm_neg_XFORM> { + let ParserMatchClass = so_imm_neg_asmoperand; +} // Note: this pattern doesn't require an encoder method and such, as it's // only used on aliases (Pat<> and InstAlias<>). The actual encoding -// is handled by the destination instructions, which use t2_so_imm. +// is handled by the destination instructions, which use so_imm. def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; } -def so_imm_not : - Operand<i32>, PatLeaf<(imm), [{ +def so_imm_not : Operand<i32>, PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1; }], so_imm_not_XFORM> { let ParserMatchClass = so_imm_not_asmoperand; @@ -512,6 +508,14 @@ def arm_i32imm : PatLeaf<(imm), [{ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue()); }]>; +/// imm0_1 predicate - Immediate in the range [0,1]. +def Imm0_1AsmOperand: ImmAsmOperand { let Name = "Imm0_1"; } +def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; } + +/// imm0_3 predicate - Immediate in the range [0,3]. +def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; } +def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; } + /// imm0_7 predicate - Immediate in the range [0,7]. def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; } def imm0_7 : Operand<i32>, ImmLeaf<i32, [{ @@ -520,6 +524,42 @@ def imm0_7 : Operand<i32>, ImmLeaf<i32, [{ let ParserMatchClass = Imm0_7AsmOperand; } +/// imm8 predicate - Immediate is exactly 8. +def Imm8AsmOperand: ImmAsmOperand { let Name = "Imm8"; } +def imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 8; }]> { + let ParserMatchClass = Imm8AsmOperand; +} + +/// imm16 predicate - Immediate is exactly 16. +def Imm16AsmOperand: ImmAsmOperand { let Name = "Imm16"; } +def imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 16; }]> { + let ParserMatchClass = Imm16AsmOperand; +} + +/// imm32 predicate - Immediate is exactly 32. +def Imm32AsmOperand: ImmAsmOperand { let Name = "Imm32"; } +def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> { + let ParserMatchClass = Imm32AsmOperand; +} + +/// imm1_7 predicate - Immediate in the range [1,7]. +def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; } +def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> { + let ParserMatchClass = Imm1_7AsmOperand; +} + +/// imm1_15 predicate - Immediate in the range [1,15]. +def Imm1_15AsmOperand: ImmAsmOperand { let Name = "Imm1_15"; } +def imm1_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 16; }]> { + let ParserMatchClass = Imm1_15AsmOperand; +} + +/// imm1_31 predicate - Immediate in the range [1,31]. +def Imm1_31AsmOperand: ImmAsmOperand { let Name = "Imm1_31"; } +def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> { + let ParserMatchClass = Imm1_31AsmOperand; +} + /// imm0_15 predicate - Immediate in the range [0,15]. def Imm0_15AsmOperand: ImmAsmOperand { let Name = "Imm0_15"; } def imm0_15 : Operand<i32>, ImmLeaf<i32, [{ @@ -544,6 +584,14 @@ def imm0_32 : Operand<i32>, ImmLeaf<i32, [{ let ParserMatchClass = Imm0_32AsmOperand; } +/// imm0_63 predicate - True if the 32-bit immediate is in the range [0,63]. +def Imm0_63AsmOperand: ImmAsmOperand { let Name = "Imm0_63"; } +def imm0_63 : Operand<i32>, ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 64; +}]> { + let ParserMatchClass = Imm0_63AsmOperand; +} + /// imm0_255 predicate - Immediate in the range [0,255]. def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; } def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> { @@ -812,6 +860,9 @@ def addrmode6dup : Operand<i32>, let PrintMethod = "printAddrMode6Operand"; let MIOperandInfo = (ops GPR:$addr, i32imm); let EncoderMethod = "getAddrMode6DupAddressOpValue"; + // FIXME: This is close, but not quite right. The alignment specifier is + // different. + let ParserMatchClass = AddrMode6AsmOperand; } // addrmodepc := pc + reg @@ -2753,23 +2804,25 @@ defm STRHT : AI3strT<0b1011, "strht">; // Load / store multiple Instructions. // -multiclass arm_ldst_mult<string asm, bit L_bit, Format f, +multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f, InstrItinClass itin, InstrItinClass itin_upd> { // IA is the default, so no need for an explicit suffix on the // mnemonic here. Without it is the cannonical spelling. def IA : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeNone, f, itin, - !strconcat(asm, "${p}\t$Rn, $regs"), "", []> { + !strconcat(asm, "${p}\t$Rn, $regs", sfx), "", []> { let Inst{24-23} = 0b01; // Increment After + let Inst{22} = P_bit; let Inst{21} = 0; // No writeback let Inst{20} = L_bit; } def IA_UPD : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeUpd, f, itin_upd, - !strconcat(asm, "${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + !strconcat(asm, "${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { let Inst{24-23} = 0b01; // Increment After + let Inst{22} = P_bit; let Inst{21} = 1; // Writeback let Inst{20} = L_bit; @@ -2778,16 +2831,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, def DA : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeNone, f, itin, - !strconcat(asm, "da${p}\t$Rn, $regs"), "", []> { + !strconcat(asm, "da${p}\t$Rn, $regs", sfx), "", []> { let Inst{24-23} = 0b00; // Decrement After + let Inst{22} = P_bit; let Inst{21} = 0; // No writeback let Inst{20} = L_bit; } def DA_UPD : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeUpd, f, itin_upd, - !strconcat(asm, "da${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + !strconcat(asm, "da${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { let Inst{24-23} = 0b00; // Decrement After + let Inst{22} = P_bit; let Inst{21} = 1; // Writeback let Inst{20} = L_bit; @@ -2796,16 +2851,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, def DB : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeNone, f, itin, - !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> { + !strconcat(asm, "db${p}\t$Rn, $regs", sfx), "", []> { let Inst{24-23} = 0b10; // Decrement Before + let Inst{22} = P_bit; let Inst{21} = 0; // No writeback let Inst{20} = L_bit; } def DB_UPD : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeUpd, f, itin_upd, - !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + !strconcat(asm, "db${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { let Inst{24-23} = 0b10; // Decrement Before + let Inst{22} = P_bit; let Inst{21} = 1; // Writeback let Inst{20} = L_bit; @@ -2814,16 +2871,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, def IB : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeNone, f, itin, - !strconcat(asm, "ib${p}\t$Rn, $regs"), "", []> { + !strconcat(asm, "ib${p}\t$Rn, $regs", sfx), "", []> { let Inst{24-23} = 0b11; // Increment Before + let Inst{22} = P_bit; let Inst{21} = 0; // No writeback let Inst{20} = L_bit; } def IB_UPD : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeUpd, f, itin_upd, - !strconcat(asm, "ib${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + !strconcat(asm, "ib${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { let Inst{24-23} = 0b11; // Increment Before + let Inst{22} = P_bit; let Inst{21} = 1; // Writeback let Inst{20} = L_bit; @@ -2834,10 +2893,12 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, let neverHasSideEffects = 1 in { let mayLoad = 1, hasExtraDefRegAllocReq = 1 in -defm LDM : arm_ldst_mult<"ldm", 1, LdStMulFrm, IIC_iLoad_m, IIC_iLoad_mu>; +defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m, + IIC_iLoad_mu>; let mayStore = 1, hasExtraSrcRegAllocReq = 1 in -defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>; +defm STM : arm_ldst_mult<"stm", "", 0, 0, LdStMulFrm, IIC_iStore_m, + IIC_iStore_mu>; } // neverHasSideEffects @@ -2851,6 +2912,16 @@ def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, (LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>, RegConstraint<"$Rn = $wb">; +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in +defm sysLDM : arm_ldst_mult<"ldm", " ^", 1, 1, LdStMulFrm, IIC_iLoad_m, + IIC_iLoad_mu>; + +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in +defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m, + IIC_iStore_mu>; + + + //===----------------------------------------------------------------------===// // Move Instructions. // @@ -4999,6 +5070,32 @@ def : MnemonicAlias<"usubaddx", "usax">; // for isel. def : ARMInstAlias<"mov${s}${p} $Rd, $imm", (MVNi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>; +def : ARMInstAlias<"mvn${s}${p} $Rd, $imm", + (MOVi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>; +// Same for AND <--> BIC +def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm", + (ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : ARMInstAlias<"bic${s}${p} $Rdn, $imm", + (ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm", + (BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : ARMInstAlias<"and${s}${p} $Rdn, $imm", + (BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; + +// Likewise, "add Rd, so_imm_neg" -> sub +def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm", + (SUBri GPR:$Rd, GPR:$Rn, so_imm_neg:$imm, pred:$p, cc_out:$s)>; +def : ARMInstAlias<"add${s}${p} $Rd, $imm", + (SUBri GPR:$Rd, GPR:$Rd, so_imm_neg:$imm, pred:$p, cc_out:$s)>; +// Same for CMP <--> CMN via so_imm_neg +def : ARMInstAlias<"cmp${p} $Rd, $imm", + (CMNzri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>; +def : ARMInstAlias<"cmn${p} $Rd, $imm", + (CMPri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>; // The shifter forms of the MOV instruction are aliased to the ASR, LSL, // LSR, ROR, and RRX instructions. @@ -5056,4 +5153,8 @@ def : ARMInstAlias<"ror${s}${p} $Rn, $Rm", // 'mul' instruction can be specified with only two operands. def : ARMInstAlias<"mul${s}${p} $Rn, $Rm", - (MUL rGPR:$Rn, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>; + (MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p, cc_out:$s)>; + +// "neg" is and alias for "rsb rd, rn, #0" +def : ARMInstAlias<"neg${s}${p} $Rd, $Rm", + (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index f2ca963..c40860d 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -74,9 +74,11 @@ def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ let MIOperandInfo = (ops i32imm); } +// Register list of one D register. def VecListOneDAsmOperand : AsmOperandClass { let Name = "VecListOneD"; let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; } def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { let ParserMatchClass = VecListOneDAsmOperand; @@ -85,6 +87,7 @@ def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { def VecListTwoDAsmOperand : AsmOperandClass { let Name = "VecListTwoD"; let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; } def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> { let ParserMatchClass = VecListTwoDAsmOperand; @@ -93,6 +96,7 @@ def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> { def VecListThreeDAsmOperand : AsmOperandClass { let Name = "VecListThreeD"; let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; } def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { let ParserMatchClass = VecListThreeDAsmOperand; @@ -101,6 +105,7 @@ def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { def VecListFourDAsmOperand : AsmOperandClass { let Name = "VecListFourD"; let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; } def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { let ParserMatchClass = VecListFourDAsmOperand; @@ -109,11 +114,92 @@ def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { def VecListTwoQAsmOperand : AsmOperandClass { let Name = "VecListTwoQ"; let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; } -def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwo"> { +def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwoSpaced"> { let ParserMatchClass = VecListTwoQAsmOperand; } +// Register list of one D register, with "all lanes" subscripting. +def VecListOneDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListOneDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { + let ParserMatchClass = VecListOneDAllLanesAsmOperand; +} +// Register list of two D registers, with "all lanes" subscripting. +def VecListTwoDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListTwoDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListTwoDAllLanes : RegisterOperand<DPR, "printVectorListTwoAllLanes"> { + let ParserMatchClass = VecListTwoDAllLanesAsmOperand; +} + +// Register list of one D register, with byte lane subscripting. +def VecListOneDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListOneDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListOneDByteIndexed : Operand<i32> { + let ParserMatchClass = VecListOneDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListOneDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListOneDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListOneDHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListOneDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListOneDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListOneDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListOneDWordIndexed : Operand<i32> { + let ParserMatchClass = VecListOneDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// Register list of two D registers, with byte lane subscripting. +def VecListTwoDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoDByteIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoDHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListTwoDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoDWordIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. //===----------------------------------------------------------------------===// @@ -272,12 +358,23 @@ class VLDQWBregisterPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), (ins addrmode6:$addr, rGPR:$offset), itin, "$addr.addr = $wb">; + class VLDQQPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; class VLDQQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset), itin, "$addr.addr = $wb">; +class VLDQQWBfixedPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), + (ins addrmode6:$addr), itin, + "$addr.addr = $wb">; +class VLDQQWBregisterPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), + (ins addrmode6:$addr, rGPR:$offset), itin, + "$addr.addr = $wb">; + + class VLDQQQQPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, "$src = $dst">; @@ -462,31 +559,23 @@ defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; // VLD2 : Vector Load (multiple 2-element structures) -class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy> +class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, + InstrItinClass itin> : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), - (ins addrmode6:$Rn), IIC_VLD2, - "vld2", Dt, "$Vd, $Rn", "", []> { - let Rm = 0b1111; - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; -} -class VLD2Q<bits<4> op7_4, string Dt, RegisterOperand VdTy> - : NLdSt<0, 0b10, 0b0011, op7_4, - (outs VdTy:$Vd), - (ins addrmode6:$Rn), IIC_VLD2x2, + (ins addrmode6:$Rn), itin, "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; } -def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8", VecListTwoD>; -def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16", VecListTwoD>; -def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32", VecListTwoD>; +def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2>; +def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2>; +def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2>; -def VLD2q8 : VLD2Q<{0,0,?,?}, "8", VecListFourD>; -def VLD2q16 : VLD2Q<{0,1,?,?}, "16", VecListFourD>; -def VLD2q32 : VLD2Q<{1,0,?,?}, "32", VecListFourD>; +def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; +def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; +def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>; def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>; @@ -497,47 +586,56 @@ def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; // ...with address register writeback: -class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy> - : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u, - "vld2", Dt, "$Vd, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; -} -class VLD2QWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> - : NLdSt<0, 0b10, 0b0011, op7_4, - (outs VdTy:$Vd, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u, - "vld2", Dt, "$Vd, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; +multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, + RegisterOperand VdTy, InstrItinClass itin> { + def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), + (ins addrmode6:$Rn), itin, + "vld2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm), itin, + "vld2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>; -def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>; -def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>; +defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2u>; +defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2u>; +defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2u>; -def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8", VecListFourD>; -def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16", VecListFourD>; -def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32", VecListFourD>; +defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; +defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; +defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; -def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; -def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; -def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; +def VLD2d8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; +def VLD2d16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; +def VLD2d32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; +def VLD2d8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; +def VLD2d16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; +def VLD2d32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; -def VLD2q8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; -def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; -def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; +def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; +def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; +def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; +def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; +def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; +def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; // ...with double-spaced registers -def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8", VecListTwoQ>; -def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16", VecListTwoQ>; -def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32", VecListTwoQ>; -def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>; -def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>; -def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>; +def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2>; +def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2>; +def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2>; +defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2u>; +defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2u>; +defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2u>; // VLD3 : Vector Load (multiple 3-element structures) class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -997,9 +1095,11 @@ def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; // VLD1DUP : Vector Load (single element to all lanes) class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn), - IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "", - [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { + : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), + (ins addrmode6dup:$Rn), + IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", + [(set VecListOneDAllLanes:$Vd, + (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; @@ -1025,9 +1125,9 @@ def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { class VLD1QDUP<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2), + : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd), (ins addrmode6dup:$Rn), IIC_VLD1dup, - "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; @@ -1038,32 +1138,63 @@ def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">; def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">; // ...with address register writeback: -class VLD1DUPWB<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, - "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD1DupInstruction"; +multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListOneDAllLanes:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListOneDAllLanes:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -class VLD1QDUPWB<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), - (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, - "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD1DupInstruction"; +multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListTwoDAllLanes:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListTwoDAllLanes:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">; -def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">; -def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">; +defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">; +defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">; +defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">; -def VLD1DUPq8_UPD : VLD1QDUPWB<{0,0,1,0}, "8">; -def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">; -def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">; +defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">; +defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; +defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; -def VLD1DUPq8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; -def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; -def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; +def VLD1DUPq8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; +def VLD1DUPq16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; +def VLD1DUPq32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; +def VLD1DUPq8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; +def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; +def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; // VLD2DUP : Vector Load (single 2-element structure to all lanes) class VLD2DUP<bits<4> op7_4, string Dt> @@ -1329,94 +1460,109 @@ def VST1q64PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; // ...with 3 registers class VST1D3<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), - IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { + (ins addrmode6:$Rn, VecListThreeD:$Vd), + IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVSTInstruction"; } -class VST1D3WB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$Vd, DPR:$src2, DPR:$src3), - IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST1D3WB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, + "vst1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), + IIC_VLD1x3u, + "vst1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -def VST1d8T : VST1D3<{0,0,0,?}, "8">; -def VST1d16T : VST1D3<{0,1,0,?}, "16">; -def VST1d32T : VST1D3<{1,0,0,?}, "32">; -def VST1d64T : VST1D3<{1,1,0,?}, "64">; +def VST1d8T : VST1D3<{0,0,0,?}, "8">; +def VST1d16T : VST1D3<{0,1,0,?}, "16">; +def VST1d32T : VST1D3<{1,0,0,?}, "32">; +def VST1d64T : VST1D3<{1,1,0,?}, "64">; -def VST1d8T_UPD : VST1D3WB<{0,0,0,?}, "8">; -def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">; -def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">; -def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">; +defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">; +defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">; +defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">; +defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">; -def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; -def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>; +def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; +def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>; +def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; // ...with 4 registers class VST1D4<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "", + (ins addrmode6:$Rn, VecListFourD:$Vd), + IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; } -class VST1D4WB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u, - "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST1D4WB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, + "vst1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), + IIC_VLD1x4u, + "vst1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -def VST1d8Q : VST1D4<{0,0,?,?}, "8">; -def VST1d16Q : VST1D4<{0,1,?,?}, "16">; -def VST1d32Q : VST1D4<{1,0,?,?}, "32">; -def VST1d64Q : VST1D4<{1,1,?,?}, "64">; +def VST1d8Q : VST1D4<{0,0,?,?}, "8">; +def VST1d16Q : VST1D4<{0,1,?,?}, "16">; +def VST1d32Q : VST1D4<{1,0,?,?}, "32">; +def VST1d64Q : VST1D4<{1,1,?,?}, "64">; -def VST1d8Q_UPD : VST1D4WB<{0,0,?,?}, "8">; -def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">; -def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">; -def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">; +defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">; +defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">; +defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">; +defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">; -def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; -def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>; +def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; +def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>; +def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; // VST2 : Vector Store (multiple 2-element structures) -class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), - IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> { - let Rm = 0b1111; - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; -} -class VST2Q<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0011, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", - "", []> { +class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, + InstrItinClass itin> + : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd), + itin, "vst2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; } -def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">; -def VST2d16 : VST2D<0b1000, {0,1,?,?}, "16">; -def VST2d32 : VST2D<0b1000, {1,0,?,?}, "32">; +def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VST2>; +def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VST2>; +def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VST2>; -def VST2q8 : VST2Q<{0,0,?,?}, "8">; -def VST2q16 : VST2Q<{0,1,?,?}, "16">; -def VST2q32 : VST2Q<{1,0,?,?}, "32">; +def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; +def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; +def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; def VST2d8Pseudo : VSTQPseudo<IIC_VST2>; def VST2d16Pseudo : VSTQPseudo<IIC_VST2>; @@ -1427,47 +1573,76 @@ def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; // ...with address register writeback: -class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2), - IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, + RegisterOperand VdTy> { + def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -class VST2QWB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u, - "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST2QWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), + IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">; -def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">; -def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">; +defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>; +defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>; +defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>; -def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">; -def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">; -def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">; +defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; +defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; +defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; -def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; -def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; -def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; +def VST2d8PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; +def VST2d16PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; +def VST2d32PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; +def VST2d8PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; +def VST2d16PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; +def VST2d32PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; -def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q8PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q16PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q32PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q8PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q16PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q32PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; // ...with double-spaced registers -def VST2b8 : VST2D<0b1001, {0,0,?,?}, "8">; -def VST2b16 : VST2D<0b1001, {0,1,?,?}, "16">; -def VST2b32 : VST2D<0b1001, {1,0,?,?}, "32">; -def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8">; -def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">; -def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">; +def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VST2>; +def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VST2>; +def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VST2>; +defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>; +defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>; +defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>; // VST3 : Vector Store (multiple 3-element structures) class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -1741,10 +1916,10 @@ def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; // ...with address register writeback: class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, - "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", - "$addr.addr = $wb", []> { + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, + "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVST2LN"; } @@ -2573,9 +2748,9 @@ class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, // Long shift by immediate. class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDNode OpNode> + ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, op6, op4, - (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm, + (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; @@ -2805,14 +2980,11 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, v4i32, v4i32, OpNode, Commutable>; } -multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> { - def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), - v4i16, ShOp>; - def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"), - v2i32, ShOp>; - def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), - v8i16, v4i16, ShOp>; - def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"), +multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { + def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; + def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; + def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; + def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", v4i32, v2i32, ShOp>; } @@ -3477,15 +3649,15 @@ multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, string Dt, SDNode OpNode> { def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, - OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> { + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, - OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> { + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, - OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> { + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } } @@ -3574,7 +3746,7 @@ def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", v2f32, v2f32, fmul, 1>; def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", v4f32, v4f32, fmul, 1>; -defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>; +defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, v2f32, fmul>; @@ -4285,18 +4457,18 @@ defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; // VSHLL : Vector Shift Left Long (with maximum shift count) class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, - ValueType OpTy, SDNode OpNode> + ValueType OpTy, Operand ImmTy, SDNode OpNode> : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, - ResTy, OpTy, OpNode> { + ResTy, OpTy, ImmTy, OpNode> { let Inst{21-16} = op21_16; let DecoderMethod = "DecodeVSHLMaxInstruction"; } def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", - v8i16, v8i8, NEONvshlli>; + v8i16, v8i8, imm8, NEONvshlli>; def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", - v4i32, v4i16, NEONvshlli>; + v4i32, v4i16, imm16, NEONvshlli>; def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", - v2i64, v2i32, NEONvshlli>; + v2i64, v2i32, imm32, NEONvshlli>; // VSHRN : Vector Shift Right and Narrow defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", @@ -4469,10 +4641,6 @@ def : InstAlias<"vmov${p} $Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; def : InstAlias<"vmov${p} $Vd, $Vm", (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyNoF64InstAlias<"vmov${p}", "$Vd, $Vm", - (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyNoF64InstAlias<"vmov${p}", "$Vd, $Vm", - (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; // VMOV : Vector Move (Immediate) @@ -4932,34 +5100,34 @@ def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; // VEXT : Vector Extract -class VEXTd<string OpcodeStr, string Dt, ValueType Ty> +class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm, + (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), - (Ty DPR:$Vm), imm:$index)))]> { + (Ty DPR:$Vm), imm:$index)))]> { bits<4> index; let Inst{11-8} = index{3-0}; } -class VEXTq<string OpcodeStr, string Dt, ValueType Ty> +class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), - (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm, + (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), - (Ty QPR:$Vm), imm:$index)))]> { + (Ty QPR:$Vm), imm:$index)))]> { bits<4> index; let Inst{11-8} = index{3-0}; } -def VEXTd8 : VEXTd<"vext", "8", v8i8> { +def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { let Inst{11-8} = index{3-0}; } -def VEXTd16 : VEXTd<"vext", "16", v4i16> { +def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { let Inst{11-9} = index{2-0}; let Inst{8} = 0b0; } -def VEXTd32 : VEXTd<"vext", "32", v2i32> { +def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { let Inst{11-10} = index{1-0}; let Inst{9-8} = 0b00; } @@ -4968,17 +5136,21 @@ def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (i32 imm:$index))), (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; -def VEXTq8 : VEXTq<"vext", "8", v16i8> { +def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { let Inst{11-8} = index{3-0}; } -def VEXTq16 : VEXTq<"vext", "16", v8i16> { +def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { let Inst{11-9} = index{2-0}; let Inst{8} = 0b0; } -def VEXTq32 : VEXTq<"vext", "32", v4i32> { +def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { let Inst{11-10} = index{1-0}; let Inst{9-8} = 0b00; } +def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { + let Inst{11} = index{0}; + let Inst{10-8} = 0b000; +} def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))), @@ -5026,17 +5198,17 @@ def VTBL1 let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2, - "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>; + (ins VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3, - "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>; + (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; def VTBL4 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), + (ins VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB4, - "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>; + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; } // hasExtraSrcRegAllocReq = 1 def VTBL2Pseudo @@ -5056,18 +5228,18 @@ def VTBX1 let hasExtraSrcRegAllocReq = 1 in { def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), - (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2, - "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>; + (ins DPR:$orig, VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; def VTBX3 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), - (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), + (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX3, - "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; def VTBX4 - : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn, - DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4, - "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", + : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), + (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; } // hasExtraSrcRegAllocReq = 1 @@ -5207,11 +5379,83 @@ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; // Assembler aliases // -// VAND/VEOR/VORR accept but do not require a type suffix. +def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", + (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; +def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", + (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; + + +// VADD two-operand aliases. +def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm", + (VADDv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm", + (VADDv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm", + (VADDv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm", + (VADDv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm", + (VADDv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm", + (VADDv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm", + (VADDv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm", + (VADDv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm", + (VADDfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm", + (VADDfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VSUB two-operand aliases. +def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm", + (VSUBv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm", + (VSUBv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm", + (VSUBv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm", + (VSUBv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm", + (VSUBv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm", + (VSUBv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm", + (VSUBv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm", + (VSUBv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm", + (VSUBfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm", + (VSUBfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VADDW two-operand aliases. +def : NEONInstAlias<"vaddw${p}.s8 $Vdn, $Vm", + (VADDWsv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.s16 $Vdn, $Vm", + (VADDWsv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.s32 $Vdn, $Vm", + (VADDWsv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.u8 $Vdn, $Vm", + (VADDWuv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.u16 $Vdn, $Vm", + (VADDWuv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm", + (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; + +// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", + (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", + (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", @@ -5220,245 +5464,450 @@ defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; - -// VLD1 requires a size suffix, but also accepts type specific variants. -// Load one D register. -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d8 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d16 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d32 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d64 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; -// with writeback, fixed stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d8wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d16wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d32wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d64wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -// with writeback, register stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d8wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d16wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d32wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d64wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; - -// Load two D registers. -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1q8 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1q16 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1q32 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1q64 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; -// with writeback, fixed stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1q8wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1q16wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1q32wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1q64wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -// with writeback, register stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1q8wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1q16wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1q32wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1q64wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; - -// Load three D registers. -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d8T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d16T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d32T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d64T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; -// with writeback, fixed stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d8Twb_fixed VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d16Twb_fixed VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d32Twb_fixed VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d64Twb_fixed VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -// with writeback, register stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d8Twb_register VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d16Twb_register VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d32Twb_register VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d64Twb_register VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; - - -// Load four D registers. -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d8Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d16Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d32Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d64Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; -// with writeback, fixed stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d8Qwb_fixed VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d16Qwb_fixed VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d32Qwb_fixed VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d64Qwb_fixed VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -// with writeback, register stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d8Qwb_register VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d16Qwb_register VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d32Qwb_register VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d64Qwb_register VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; - -// VST1 requires a size suffix, but also accepts type specific variants. -// Store one D register. -defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1d8 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1d16 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1d32 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1d64 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -// with writeback, fixed stride -defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1d8wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1d16wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1d32wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1d64wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -// with writeback, register stride -defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1d8wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm, - VecListOneD:$Vd, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1d16wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm, - VecListOneD:$Vd, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1d32wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm, - VecListOneD:$Vd, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1d64wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm, - VecListOneD:$Vd, pred:$p)>; - -// Store two D registers. -defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1q8 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1q16 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1q32 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1q64 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -// with writeback, fixed stride -defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1q8wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1q16wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1q32wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1q64wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -// with writeback, register stride -defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1q8wb_register zero_reg, addrmode6:$Rn, - rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1q16wb_register zero_reg, addrmode6:$Rn, - rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1q32wb_register zero_reg, addrmode6:$Rn, - rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1q64wb_register zero_reg, addrmode6:$Rn, - rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>; - -// FIXME: The three and four register VST1 instructions haven't been moved -// to the VecList* encoding yet, so we can't do assembly parsing support -// for them. Uncomment these when that happens. -// Load three D registers. -//defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d8T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; -//defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d16T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; -//defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d32T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; -//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d64T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; - -// Load four D registers. -//defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d8Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; -//defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d16Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; -//defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d32Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; -//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d64Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; - - -// VTRN instructions data type suffix aliases for more-specific types. -defm : VFPDT8ReqInstAlias <"vtrn${p}", "$Dd, $Dm", - (VTRNd8 DPR:$Dd, DPR:$Dm, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vtrn${p}", "$Dd, $Dm", - (VTRNd16 DPR:$Dd, DPR:$Dm, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vtrn${p}", "$Dd, $Dm", - (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; - -defm : VFPDT8ReqInstAlias <"vtrn${p}", "$Qd, $Qm", - (VTRNq8 QPR:$Qd, QPR:$Qm, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vtrn${p}", "$Qd, $Qm", - (VTRNq16 QPR:$Qd, QPR:$Qm, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vtrn${p}", "$Qd, $Qm", - (VTRNq32 QPR:$Qd, QPR:$Qm, pred:$p)>; +// ... two-operand aliases +def : NEONInstAlias<"vand${p} $Vdn, $Vm", + (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vand${p} $Vdn, $Vm", + (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vbic${p} $Vdn, $Vm", + (VBICd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vbic${p} $Vdn, $Vm", + (VBICq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"veor${p} $Vdn, $Vm", + (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"veor${p} $Vdn, $Vm", + (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vorr${p} $Vdn, $Vm", + (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vorr${p} $Vdn, $Vm", + (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", + (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", + (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", + (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", + (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", + (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", + (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VMUL two-operand aliases. +def : NEONInstAlias<"vmul${p}.p8 $Qdn, $Qm", + (VMULpq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i8 $Qdn, $Qm", + (VMULv16i8 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Qm", + (VMULv8i16 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Qm", + (VMULv4i32 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.p8 $Ddn, $Dm", + (VMULpd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i8 $Ddn, $Dm", + (VMULv8i8 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm", + (VMULv4i16 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm", + (VMULv2i32 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Qm", + (VMULfq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm", + (VMULfd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane", + (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm, + VectorIndex16:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane", + (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm, + VectorIndex16:$lane, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane", + (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane", + (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane", + (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane", + (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; + +// VQADD (register) two-operand aliases. +def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm", + (VQADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm", + (VQADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm", + (VQADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm", + (VQADDsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm", + (VQADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm", + (VQADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm", + (VQADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm", + (VQADDuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm", + (VQADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm", + (VQADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm", + (VQADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm", + (VQADDsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm", + (VQADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm", + (VQADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm", + (VQADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm", + (VQADDuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VSHL (immediate) two-operand aliases. +def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm", + (VSHLiv8i8 DPR:$Vdn, DPR:$Vdn, imm0_7:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm", + (VSHLiv4i16 DPR:$Vdn, DPR:$Vdn, imm0_15:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm", + (VSHLiv2i32 DPR:$Vdn, DPR:$Vdn, imm0_31:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm", + (VSHLiv1i64 DPR:$Vdn, DPR:$Vdn, imm0_63:$imm, pred:$p)>; + +def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm", + (VSHLiv16i8 QPR:$Vdn, QPR:$Vdn, imm0_7:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm", + (VSHLiv8i16 QPR:$Vdn, QPR:$Vdn, imm0_15:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm", + (VSHLiv4i32 QPR:$Vdn, QPR:$Vdn, imm0_31:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm", + (VSHLiv2i64 QPR:$Vdn, QPR:$Vdn, imm0_63:$imm, pred:$p)>; + +// VSHL (register) two-operand aliases. +def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm", + (VSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm", + (VSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm", + (VSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm", + (VSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm", + (VSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm", + (VSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", + (VSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", + (VSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm", + (VSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm", + (VSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm", + (VSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm", + (VSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm", + (VSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm", + (VSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", + (VSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", + (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VSHL (immediate) two-operand aliases. +def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", + (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", + (VSHRsv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm", + (VSHRsv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm", + (VSHRsv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", + (VSHRsv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", + (VSHRsv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm", + (VSHRsv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm", + (VSHRsv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm", + (VSHRuv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm", + (VSHRuv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", + (VSHRuv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", + (VSHRuv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm", + (VSHRuv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm", + (VSHRuv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", + (VSHRuv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", + (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; + +// VLD1 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +defm VLD1LNdAsm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD1LNdAsm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD1LNdAsm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +defm VLD1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr!", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr!", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr!", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD1LNdWB_register_Asm : + NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VLD1LNdWB_register_Asm : + NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VLD1LNdWB_register_Asm : + NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST1 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +defm VST1LNdAsm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST1LNdAsm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST1LNdAsm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +defm VST1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr!", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr!", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr!", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST1LNdWB_register_Asm : + NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VST1LNdWB_register_Asm : + NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VST1LNdWB_register_Asm : + NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD2 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +defm VLD2LNdAsm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD2LNdAsm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD2LNdAsm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +defm VLD2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr!", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr!", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr!", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD2LNdWB_register_Asm : + NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VLD2LNdWB_register_Asm : + NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VLD2LNdWB_register_Asm : + NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST2 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +defm VST2LNdAsm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST2LNdAsm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST2LNdAsm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +defm VST2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr!", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr!", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr!", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST2LNdWB_register_Asm : + NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VST2LNdWB_register_Asm : + NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VST2LNdWB_register_Asm : + NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VMOV takes an optional datatype suffix +defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", + (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", + (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; + +// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. +// D-register versions. +def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", + (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", + (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", + (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", + (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", + (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", + (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", + (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +// Q-register versions. +def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", + (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", + (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", + (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", + (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", + (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", + (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", + (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; + +// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. +// D-register versions. +def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", + (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", + (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", + (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", + (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", + (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", + (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", + (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +// Q-register versions. +def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", + (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", + (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", + (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", + (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", + (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", + (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", + (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; + +// Two-operand variants for VEXT +def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm", + (VEXTd8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_7:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm", + (VEXTd16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_3:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm", + (VEXTd32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_1:$imm, pred:$p)>; + +def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm", + (VEXTq8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_15:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm", + (VEXTq16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_7:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm", + (VEXTq32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_3:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.64 $Vdn, $Vm, $imm", + (VEXTq64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_1:$imm, pred:$p)>; + +// Two-operand variants for VQDMULH +def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", + (VQDMULHv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", + (VQDMULHv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", + (VQDMULHv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", + (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, +// these should restrict to just the Q register variants, but the register +// classes are enough to match correctly regardless, so we keep it simple +// and just use MnemonicAlias. +def : NEONMnemonicAlias<"vbicq", "vbic">; +def : NEONMnemonicAlias<"vandq", "vand">; +def : NEONMnemonicAlias<"veorq", "veor">; +def : NEONMnemonicAlias<"vorrq", "vorr">; + +def : NEONMnemonicAlias<"vmovq", "vmov">; +def : NEONMnemonicAlias<"vmvnq", "vmvn">; +// Explicit versions for floating point so that the FPImm variants get +// handled early. The parser gets confused otherwise. +def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; +def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; + +def : NEONMnemonicAlias<"vaddq", "vadd">; +def : NEONMnemonicAlias<"vsubq", "vsub">; + +def : NEONMnemonicAlias<"vminq", "vmin">; +def : NEONMnemonicAlias<"vmaxq", "vmax">; + +def : NEONMnemonicAlias<"vmulq", "vmul">; + +def : NEONMnemonicAlias<"vabsq", "vabs">; + +def : NEONMnemonicAlias<"vshlq", "vshl">; +def : NEONMnemonicAlias<"vshrq", "vshr">; + +def : NEONMnemonicAlias<"vcvtq", "vcvt">; + +def : NEONMnemonicAlias<"vcleq", "vcle">; +def : NEONMnemonicAlias<"vceqq", "vceq">; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index c6cc98d..ac1a229 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -1131,9 +1131,6 @@ def tRSB : // A8.6.141 "rsb", "\t$Rd, $Rn, #0", [(set tGPR:$Rd, (ineg tGPR:$Rn))]>; -def : tInstAlias<"neg${s}${p} $Rd, $Rm", - (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>; - // Subtract with carry register let Uses = [CPSR] in def tSBC : // A8.6.151 @@ -1435,3 +1432,8 @@ def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>; // nothing). def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; + +// "neg" is and alias for "rsb rd, rn, #0" +def : tInstAlias<"neg${s}${p} $Rd, $Rm", + (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>; + diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 6129fa3..981592c 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -80,18 +80,19 @@ def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{ // only used on aliases (Pat<> and InstAlias<>). The actual encoding // is handled by the destination instructions, which use t2_so_imm. def t2_so_imm_not_asmoperand : AsmOperandClass { let Name = "T2SOImmNot"; } -def t2_so_imm_not : Operand<i32>, - PatLeaf<(imm), [{ +def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{ return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1; }], t2_so_imm_not_XFORM> { let ParserMatchClass = t2_so_imm_not_asmoperand; } // t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm. -def t2_so_imm_neg : Operand<i32>, - PatLeaf<(imm), [{ +def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; } +def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1; -}], t2_so_imm_neg_XFORM>; +}], t2_so_imm_neg_XFORM> { + let ParserMatchClass = t2_so_imm_neg_asmoperand; +} /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095]. def imm0_4095 : Operand<i32>, @@ -1333,7 +1334,7 @@ def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), let mayStore = 1, neverHasSideEffects = 1 in { def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb), - (ins rGPR:$Rt, t2addrmode_imm8:$addr), + (ins GPRnopc:$Rt, t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, "str", "\t$Rt, $addr!", "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { @@ -1357,13 +1358,13 @@ def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb), } // mayStore = 1, neverHasSideEffects = 1 def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb), - (ins rGPR:$Rt, addr_offset_none:$Rn, + (ins GPRnopc:$Rt, addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iStore_iu, "str", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPRnopc:$Rn_wb, - (post_store rGPR:$Rt, addr_offset_none:$Rn, + (post_store GPRnopc:$Rt, addr_offset_none:$Rn, t2am_imm8_offset:$offset))]>; def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb), @@ -3971,6 +3972,18 @@ def : t2InstAlias<"push${p} $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>; def : t2InstAlias<"pop${p}.w $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>; def : t2InstAlias<"pop${p} $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>; +// STMIA/STMIA_UPD aliases w/o the optional .w suffix +def : t2InstAlias<"stm${p} $Rn, $regs", + (t2STMIA GPR:$Rn, pred:$p, reglist:$regs)>; +def : t2InstAlias<"stm${p} $Rn!, $regs", + (t2STMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>; + +// LDMIA/LDMIA_UPD aliases w/o the optional .w suffix +def : t2InstAlias<"ldm${p} $Rn, $regs", + (t2LDMIA GPR:$Rn, pred:$p, reglist:$regs)>; +def : t2InstAlias<"ldm${p} $Rn!, $regs", + (t2LDMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>; + // STMDB/STMDB_UPD aliases w/ the optional .w suffix def : t2InstAlias<"stmdb${p}.w $Rn, $regs", (t2STMDB GPR:$Rn, pred:$p, reglist:$regs)>; @@ -4084,8 +4097,50 @@ def : t2InstAlias<"sxth${p} $Rd, $Rm$rot", // for isel. def : t2InstAlias<"mov${p} $Rd, $imm", (t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>; +def : t2InstAlias<"mvn${p} $Rd, $imm", + (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>; +// Same for AND <--> BIC +def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm", + (t2ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstAlias<"bic${s}${p} $Rdn, $imm", + (t2ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm", + (t2BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstAlias<"and${s}${p} $Rdn, $imm", + (t2BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +// Likewise, "add Rd, t2_so_imm_neg" -> sub +def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", + (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, + pred:$p, cc_out:$s)>; +def : t2InstAlias<"add${s}${p} $Rd, $imm", + (t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm, + pred:$p, cc_out:$s)>; +// Same for CMP <--> CMN via t2_so_imm_neg +def : t2InstAlias<"cmp${p} $Rd, $imm", + (t2CMNzri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>; +def : t2InstAlias<"cmn${p} $Rd, $imm", + (t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>; // Wide 'mul' encoding can be specified with only two operands. def : t2InstAlias<"mul${p} $Rn, $Rm", - (t2MUL rGPR:$Rn, rGPR:$Rn, rGPR:$Rm, pred:$p)>; + (t2MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p)>; + +// "neg" is and alias for "rsb rd, rn, #0" +def : t2InstAlias<"neg${s}${p} $Rd, $Rm", + (t2RSBri rGPR:$Rd, rGPR:$Rm, 0, pred:$p, cc_out:$s)>; + +// MOV so_reg assembler pseudos. InstAlias isn't expressive enough for +// these, unfortunately. +def t2MOVsi: t2AsmPseudo<"mov${p} $Rd, $shift", + (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>; +def t2MOVSsi: t2AsmPseudo<"movs${p} $Rd, $shift", + (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>; + +// ADR w/o the .w suffix +def : t2InstAlias<"adr${p} $Rd, $addr", + (t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index e420135..5d43556 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -1160,18 +1160,64 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), //===----------------------------------------------------------------------===// // Assembler aliases. // +// A few mnemnoic aliases for pre-unifixed syntax. We don't guarantee to +// support them all, but supporting at least some of the basics is +// good to be friendly. +def : VFP2MnemonicAlias<"flds", "vldr">; +def : VFP2MnemonicAlias<"fldd", "vldr">; +def : VFP2MnemonicAlias<"fmrs", "vmov">; +def : VFP2MnemonicAlias<"fmsr", "vmov">; +def : VFP2MnemonicAlias<"fsqrts", "vsqrt">; +def : VFP2MnemonicAlias<"fsqrtd", "vsqrt">; +def : VFP2MnemonicAlias<"fadds", "vadd.f32">; +def : VFP2MnemonicAlias<"faddd", "vadd.f64">; +def : VFP2MnemonicAlias<"fmrdd", "vmov">; +def : VFP2MnemonicAlias<"fmrds", "vmov">; +def : VFP2MnemonicAlias<"fmrrd", "vmov">; +def : VFP2MnemonicAlias<"fmdrr", "vmov">; +def : VFP2MnemonicAlias<"fmuld", "vmul.f64">; +def : VFP2MnemonicAlias<"fnegs", "vneg.f32">; +def : VFP2MnemonicAlias<"fnegd", "vneg.f64">; +def : VFP2MnemonicAlias<"ftosizd", "vcvt.s32.f64">; +def : VFP2MnemonicAlias<"ftosid", "vcvtr.s32.f64">; +def : VFP2MnemonicAlias<"ftosizs", "vcvt.s32.f32">; +def : VFP2MnemonicAlias<"ftosis", "vcvtr.s32.f32">; +def : VFP2MnemonicAlias<"ftouizd", "vcvt.u32.f64">; +def : VFP2MnemonicAlias<"ftouid", "vcvtr.u32.f64">; +def : VFP2MnemonicAlias<"ftouizs", "vcvt.u32.f32">; +def : VFP2MnemonicAlias<"ftouis", "vcvtr.u32.f32">; +def : VFP2MnemonicAlias<"fsitod", "vcvt.f64.s32">; +def : VFP2MnemonicAlias<"fsitos", "vcvt.f32.s32">; +def : VFP2MnemonicAlias<"fuitod", "vcvt.f64.u32">; +def : VFP2MnemonicAlias<"fuitos", "vcvt.f32.u32">; +def : VFP2MnemonicAlias<"fsts", "vstr">; +def : VFP2MnemonicAlias<"fstd", "vstr">; +def : VFP2MnemonicAlias<"fmacd", "vmla.f64">; +def : VFP2MnemonicAlias<"fmacs", "vmla.f32">; def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>; +def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm", + (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>; +def : VFP2InstAlias<"faddd${p} $Dd, $Dn, $Dm", + (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm", + (VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>; +def : VFP2InstAlias<"fsubd${p} $Dd, $Dn, $Dm", + (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; + +// No need for the size suffix on VSQRT. It's implied by the register classes. +def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>; +def : VFP2InstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>; // VLDR/VSTR accept an optional type suffix. -defm : VFPDT32InstAlias<"vldr${p}", "$Sd, $addr", - (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>; -defm : VFPDT32InstAlias<"vstr${p}", "$Sd, $addr", - (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>; -defm : VFPDT64InstAlias<"vldr${p}", "$Dd, $addr", - (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>; -defm : VFPDT64InstAlias<"vstr${p}", "$Dd, $addr", - (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>; +def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr", + (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>; +def : VFP2InstAlias<"vstr${p}.32 $Sd, $addr", + (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>; +def : VFP2InstAlias<"vldr${p}.64 $Dd, $addr", + (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>; +def : VFP2InstAlias<"vstr${p}.64 $Dd, $addr", + (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>; // VMUL has a two-operand form (implied destination operand) def : VFP2InstAlias<"vmul${p}.f64 $Dn, $Dm", diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index c8728f4..6712fb6 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -33,6 +33,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -1471,19 +1472,18 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, while (++I != E) { if (I->isDebugValue() || MemOps.count(&*I)) continue; - const MCInstrDesc &MCID = I->getDesc(); - if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects()) + if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) return false; - if (isLd && MCID.mayStore()) + if (isLd && I->mayStore()) return false; if (!isLd) { - if (MCID.mayLoad()) + if (I->mayLoad()) return false; // It's not safe to move the first 'str' down. // str r1, [r0] // strh r5, [r0] // str r4, [r0, #+4] - if (MCID.mayStore()) + if (I->mayStore()) return false; } for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { @@ -1773,8 +1773,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { while (MBBI != E) { for (; MBBI != E; ++MBBI) { MachineInstr *MI = MBBI; - const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.isCall() || MCID.isTerminator()) { + if (MI->isCall() || MI->isTerminator()) { // Stop at barriers. ++MBBI; break; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 6cbb24b..61b75cb 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -38,22 +38,25 @@ extern "C" void LLVMInitializeARMTarget() { /// ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS), JITInfo(), InstrItins(Subtarget.getInstrItineraryData()) { // Default to soft float ABI - if (FloatABIType == FloatABI::Default) - FloatABIType = FloatABI::Soft; + if (Options.FloatABIType == FloatABI::Default) + this->Options.FloatABIType = FloatABI::Soft; } ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM, OL), InstrInfo(Subtarget), + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + InstrInfo(Subtarget), DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:64-i64:32:64-" "v128:32:128-v64:32:64-n32-S32") : @@ -73,9 +76,10 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM, OL), + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), InstrInfo(Subtarget.hasThumb2() ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget)) : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), @@ -143,10 +147,16 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM) { } bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM) { - if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb()) - PM.add(createThumb2SizeReductionPass()); + if (Subtarget.isThumb2()) { + if (!Subtarget.prefers32BitThumb()) + PM.add(createThumb2SizeReductionPass()); + + // Constant island pass work on unbundled instructions. + PM.add(createUnpackMachineBundlesPass()); + } PM.add(createARMConstantIslandPass()); + return true; } diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index a1f517b..cd77822 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -41,6 +41,7 @@ private: public: ARMBaseTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); @@ -71,6 +72,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine { public: ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); @@ -112,6 +114,7 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { public: ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 19defa1..721a225 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -36,6 +36,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, ELF::SHF_WRITE | ELF::SHF_ALLOC, SectionKind::getDataRel()); + StructorOutputOrder = Structors::PriorityOrder; LSDASection = NULL; } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index bb83e5e..cd86065 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -39,10 +39,15 @@ namespace { class ARMOperand; +enum VectorLaneTy { NoLanes, AllLanes, IndexedLane }; + class ARMAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; + // Map of register aliases registers via the .req directive. + StringMap<unsigned> RegisterReqs; + struct { ARMCC::CondCodes Cond; // Condition for IT block. unsigned Mask:4; // Condition mask for instructions. @@ -90,9 +95,12 @@ class ARMAsmParser : public MCTargetAsmParser { unsigned &ShiftAmount); bool parseDirectiveWord(unsigned Size, SMLoc L); bool parseDirectiveThumb(SMLoc L); + bool parseDirectiveARM(SMLoc L); bool parseDirectiveThumbFunc(SMLoc L); bool parseDirectiveCode(SMLoc L); bool parseDirectiveSyntax(SMLoc L); + bool parseDirectiveReq(StringRef Name, SMLoc L); + bool parseDirectiveUnreq(SMLoc L); StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode, bool &CarrySetting, unsigned &ProcessorIMod, @@ -161,6 +169,7 @@ class ARMAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index); // Asm Match Converter Methods bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode, @@ -271,6 +280,8 @@ class ARMOperand : public MCParsedAsmOperand { k_DPRRegisterList, k_SPRRegisterList, k_VectorList, + k_VectorListAllLanes, + k_VectorListIndexed, k_ShiftedRegister, k_ShiftedImmediate, k_ShifterImmediate, @@ -324,6 +335,8 @@ class ARMOperand : public MCParsedAsmOperand { struct { unsigned RegNum; unsigned Count; + unsigned LaneIndex; + bool isDoubleSpaced; } VectorList; struct { @@ -409,6 +422,8 @@ public: Registers = o.Registers; break; case k_VectorList: + case k_VectorListAllLanes: + case k_VectorListIndexed: VectorList = o.VectorList; break; case k_CoprocNum: @@ -562,6 +577,22 @@ public: int64_t Value = CE->getValue(); return Value >= 0 && Value < 256; } + bool isImm0_1() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 2; + } + bool isImm0_3() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 4; + } bool isImm0_7() const { if (Kind != k_Immediate) return false; @@ -586,6 +617,94 @@ public: int64_t Value = CE->getValue(); return Value >= 0 && Value < 32; } + bool isImm0_63() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 64; + } + bool isImm8() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value == 8; + } + bool isImm16() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value == 16; + } + bool isImm32() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value == 32; + } + bool isShrImm8() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= 8; + } + bool isShrImm16() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= 16; + } + bool isShrImm32() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= 32; + } + bool isShrImm64() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= 64; + } + bool isImm1_7() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value < 8; + } + bool isImm1_15() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value < 16; + } + bool isImm1_31() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value < 32; + } bool isImm1_16() const { if (Kind != k_Immediate) return false; @@ -676,6 +795,14 @@ public: int64_t Value = CE->getValue(); return ARM_AM::getSOImmVal(~Value) != -1; } + bool isARMSOImmNeg() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ARM_AM::getSOImmVal(-Value) != -1; + } bool isT2SOImm() const { if (Kind != k_Immediate) return false; @@ -692,6 +819,14 @@ public: int64_t Value = CE->getValue(); return ARM_AM::getT2SOImmVal(~Value) != -1; } + bool isT2SOImmNeg() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ARM_AM::getT2SOImmVal(-Value) != -1; + } bool isSetEndImm() const { if (Kind != k_Immediate) return false; @@ -892,9 +1027,9 @@ public: if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Immediate offset in range [-255, -1]. - if (!Memory.OffsetImm) return true; + if (!Memory.OffsetImm) return false; int64_t Val = Memory.OffsetImm->getValue(); - return Val > -256 && Val < 0; + return (Val == INT32_MIN) || (Val > -256 && Val < 0); } bool isMemUImm12Offset() const { if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) @@ -940,31 +1075,75 @@ public: bool isProcIFlags() const { return Kind == k_ProcIFlags; } // NEON operands. + bool isSingleSpacedVectorList() const { + return Kind == k_VectorList && !VectorList.isDoubleSpaced; + } + bool isDoubleSpacedVectorList() const { + return Kind == k_VectorList && VectorList.isDoubleSpaced; + } bool isVecListOneD() const { - if (Kind != k_VectorList) return false; + if (!isSingleSpacedVectorList()) return false; return VectorList.Count == 1; } bool isVecListTwoD() const { - if (Kind != k_VectorList) return false; + if (!isSingleSpacedVectorList()) return false; return VectorList.Count == 2; } bool isVecListThreeD() const { - if (Kind != k_VectorList) return false; + if (!isSingleSpacedVectorList()) return false; return VectorList.Count == 3; } bool isVecListFourD() const { - if (Kind != k_VectorList) return false; + if (!isSingleSpacedVectorList()) return false; return VectorList.Count == 4; } bool isVecListTwoQ() const { - if (Kind != k_VectorList) return false; - //FIXME: We haven't taught the parser to handle by-two register lists - // yet, so don't pretend to know one. - return VectorList.Count == 2 && false; + if (!isDoubleSpacedVectorList()) return false; + return VectorList.Count == 2; + } + + bool isVecListOneDAllLanes() const { + if (Kind != k_VectorListAllLanes) return false; + return VectorList.Count == 1; + } + + bool isVecListTwoDAllLanes() const { + if (Kind != k_VectorListAllLanes) return false; + return VectorList.Count == 2; + } + + bool isVecListOneDByteIndexed() const { + if (Kind != k_VectorListIndexed) return false; + return VectorList.Count == 1 && VectorList.LaneIndex <= 7; + } + + bool isVecListOneDHWordIndexed() const { + if (Kind != k_VectorListIndexed) return false; + return VectorList.Count == 1 && VectorList.LaneIndex <= 3; + } + + bool isVecListOneDWordIndexed() const { + if (Kind != k_VectorListIndexed) return false; + return VectorList.Count == 1 && VectorList.LaneIndex <= 1; + } + + bool isVecListTwoDByteIndexed() const { + if (Kind != k_VectorListIndexed) return false; + return VectorList.Count == 2 && VectorList.LaneIndex <= 7; + } + + bool isVecListTwoDHWordIndexed() const { + if (Kind != k_VectorListIndexed) return false; + return VectorList.Count == 2 && VectorList.LaneIndex <= 3; + } + + bool isVecListTwoDWordIndexed() const { + if (Kind != k_VectorListIndexed) return false; + return VectorList.Count == 2 && VectorList.LaneIndex <= 1; } bool isVectorIndex8() const { @@ -1233,6 +1412,14 @@ public: Inst.addOperand(MCOperand::CreateImm(~CE->getValue())); } + void addT2SOImmNegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The operand is actually a t2_so_imm, but we have its + // negation in the assembly source, so twiddle it here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(-CE->getValue())); + } + void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The operand is actually a so_imm, but we have its bitwise @@ -1241,6 +1428,14 @@ public: Inst.addOperand(MCOperand::CreateImm(~CE->getValue())); } + void addARMSOImmNegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The operand is actually a so_imm, but we have its + // negation in the assembly source, so twiddle it here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(-CE->getValue())); + } + void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt()))); @@ -1527,37 +1722,15 @@ public: Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags()))); } - void addVecListOneDOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); - } - - void addVecListTwoDOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - // Only the first register actually goes on the instruction. The rest - // are implied by the opcode. - Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); - } - - void addVecListThreeDOperands(MCInst &Inst, unsigned N) const { + void addVecListOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - // Only the first register actually goes on the instruction. The rest - // are implied by the opcode. Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); } - void addVecListFourDOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - // Only the first register actually goes on the instruction. The rest - // are implied by the opcode. - Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); - } - - void addVecListTwoQOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - // Only the first register actually goes on the instruction. The rest - // are implied by the opcode. + void addVecListIndexedOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); + Inst.addOperand(MCOperand::CreateImm(VectorList.LaneIndex)); } void addVectorIndex8Operands(MCInst &Inst, unsigned N) const { @@ -1780,10 +1953,32 @@ public: } static ARMOperand *CreateVectorList(unsigned RegNum, unsigned Count, - SMLoc S, SMLoc E) { + bool isDoubleSpaced, SMLoc S, SMLoc E) { ARMOperand *Op = new ARMOperand(k_VectorList); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; + Op->VectorList.isDoubleSpaced = isDoubleSpaced; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static ARMOperand *CreateVectorListAllLanes(unsigned RegNum, unsigned Count, + SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(k_VectorListAllLanes); + Op->VectorList.RegNum = RegNum; + Op->VectorList.Count = Count; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static ARMOperand *CreateVectorListIndexed(unsigned RegNum, unsigned Count, + unsigned Index, SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(k_VectorListIndexed); + Op->VectorList.RegNum = RegNum; + Op->VectorList.Count = Count; + Op->VectorList.LaneIndex = Index; Op->StartLoc = S; Op->EndLoc = E; return Op; @@ -1982,6 +2177,14 @@ void ARMOperand::print(raw_ostream &OS) const { OS << "<vector_list " << VectorList.Count << " * " << VectorList.RegNum << ">"; break; + case k_VectorListAllLanes: + OS << "<vector_list(all lanes) " << VectorList.Count << " * " + << VectorList.RegNum << ">"; + break; + case k_VectorListIndexed: + OS << "<vector_list(lane " << VectorList.LaneIndex << ") " + << VectorList.Count << " * " << VectorList.RegNum << ">"; + break; case k_Token: OS << "'" << getToken() << "'"; break; @@ -2000,7 +2203,9 @@ static unsigned MatchRegisterName(StringRef Name); bool ARMAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { + StartLoc = Parser.getTok().getLoc(); RegNo = tryParseRegister(); + EndLoc = Parser.getTok().getLoc(); return (RegNo == (unsigned)-1); } @@ -2013,8 +2218,6 @@ int ARMAsmParser::tryParseRegister() { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) return -1; - // FIXME: Validate register for the current architecture; we have to do - // validation later, so maybe there is no need for this here. std::string lowerCase = Tok.getString().lower(); unsigned RegNum = MatchRegisterName(lowerCase); if (!RegNum) { @@ -2023,9 +2226,34 @@ int ARMAsmParser::tryParseRegister() { .Case("r14", ARM::LR) .Case("r15", ARM::PC) .Case("ip", ARM::R12) + // Additional register name aliases for 'gas' compatibility. + .Case("a1", ARM::R0) + .Case("a2", ARM::R1) + .Case("a3", ARM::R2) + .Case("a4", ARM::R3) + .Case("v1", ARM::R4) + .Case("v2", ARM::R5) + .Case("v3", ARM::R6) + .Case("v4", ARM::R7) + .Case("v5", ARM::R8) + .Case("v6", ARM::R9) + .Case("v7", ARM::R10) + .Case("v8", ARM::R11) + .Case("sb", ARM::R9) + .Case("sl", ARM::R10) + .Case("fp", ARM::R11) .Default(0); } - if (!RegNum) return -1; + if (!RegNum) { + // Check for aliases registered via .req. + StringMap<unsigned>::const_iterator Entry = + RegisterReqs.find(Tok.getIdentifier()); + // If no match, return failure. + if (Entry == RegisterReqs.end()) + return -1; + Parser.Lex(); // Eat identifier token. + return Entry->getValue(); + } Parser.Lex(); // Eat identifier token. @@ -2045,6 +2273,7 @@ int ARMAsmParser::tryParseShiftRegister( std::string lowerCase = Tok.getString().lower(); ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase) + .Case("asl", ARM_AM::lsl) .Case("lsl", ARM_AM::lsl) .Case("lsr", ARM_AM::lsr) .Case("asr", ARM_AM::asr) @@ -2073,7 +2302,8 @@ int ARMAsmParser::tryParseShiftRegister( ShiftReg = SrcReg; } else { // Figure out if this is shifted by a constant or a register (for non-RRX). - if (Parser.getTok().is(AsmToken::Hash)) { + if (Parser.getTok().is(AsmToken::Hash) || + Parser.getTok().is(AsmToken::Dollar)) { Parser.Lex(); // Eat hash. SMLoc ImmLoc = Parser.getTok().getLoc(); const MCExpr *ShiftExpr = 0; @@ -2446,6 +2676,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat the comma. RegLoc = Parser.getTok().getLoc(); int OldReg = Reg; + const AsmToken RegTok = Parser.getTok(); Reg = tryParseRegister(); if (Reg == -1) return Error(RegLoc, "register expected"); @@ -2459,8 +2690,13 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!RC->contains(Reg)) return Error(RegLoc, "invalid register in register list"); // List must be monotonically increasing. - if (getARMRegisterNumbering(Reg) <= getARMRegisterNumbering(OldReg)) + if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) return Error(RegLoc, "register list not in ascending order"); + if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) { + Warning(RegLoc, "duplicated register (" + RegTok.getString() + + ") in register list"); + continue; + } // VFP register lists must also be contiguous. // It's OK to use the enumeration values directly here rather, as the // VFP register classes have the enum sorted properly. @@ -2477,13 +2713,55 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return Error(E, "'}' expected"); Parser.Lex(); // Eat '}' token. + // Push the register list operand. Operands.push_back(ARMOperand::CreateRegList(Registers, S, E)); + + // The ARM system instruction variants for LDM/STM have a '^' token here. + if (Parser.getTok().is(AsmToken::Caret)) { + Operands.push_back(ARMOperand::CreateToken("^",Parser.getTok().getLoc())); + Parser.Lex(); // Eat '^' token. + } + return false; } +// Helper function to parse the lane index for vector lists. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) { + Index = 0; // Always return a defined index value. + if (Parser.getTok().is(AsmToken::LBrac)) { + Parser.Lex(); // Eat the '['. + if (Parser.getTok().is(AsmToken::RBrac)) { + // "Dn[]" is the 'all lanes' syntax. + LaneKind = AllLanes; + Parser.Lex(); // Eat the ']'. + return MatchOperand_Success; + } + if (Parser.getTok().is(AsmToken::Integer)) { + int64_t Val = Parser.getTok().getIntVal(); + // Make this range check context sensitive for .8, .16, .32. + if (Val < 0 && Val > 7) + Error(Parser.getTok().getLoc(), "lane index out of range"); + Index = Val; + LaneKind = IndexedLane; + Parser.Lex(); // Eat the token; + if (Parser.getTok().isNot(AsmToken::RBrac)) + Error(Parser.getTok().getLoc(), "']' expected"); + Parser.Lex(); // Eat the ']'. + return MatchOperand_Success; + } + Error(Parser.getTok().getLoc(), "lane index must be empty or an integer"); + return MatchOperand_ParseFail; + } + LaneKind = NoLanes; + return MatchOperand_Success; +} + // parse a vector register list ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + VectorLaneTy LaneKind; + unsigned LaneIndex; SMLoc S = Parser.getTok().getLoc(); // As an extension (to match gas), support a plain D register or Q register // (without encosing curly braces) as a single or double entry list, @@ -2494,12 +2772,48 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_NoMatch; SMLoc E = Parser.getTok().getLoc(); if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) { - Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, S, E)); + OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex); + if (Res != MatchOperand_Success) + return Res; + switch (LaneKind) { + default: + assert(0 && "unexpected lane kind!"); + case NoLanes: + E = Parser.getTok().getLoc(); + Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E)); + break; + case AllLanes: + E = Parser.getTok().getLoc(); + Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, S, E)); + break; + case IndexedLane: + Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 1, + LaneIndex, S,E)); + break; + } return MatchOperand_Success; } if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { Reg = getDRegFromQReg(Reg); - Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, S, E)); + OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex); + if (Res != MatchOperand_Success) + return Res; + switch (LaneKind) { + default: + assert(0 && "unexpected lane kind!"); + case NoLanes: + E = Parser.getTok().getLoc(); + Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E)); + break; + case AllLanes: + E = Parser.getTok().getLoc(); + Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, S, E)); + break; + case IndexedLane: + Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 2, + LaneIndex, S,E)); + break; + } return MatchOperand_Success; } Error(S, "vector register expected"); @@ -2518,18 +2832,30 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_ParseFail; } unsigned Count = 1; + int Spacing = 0; unsigned FirstReg = Reg; // The list is of D registers, but we also allow Q regs and just interpret // them as the two D sub-registers. if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { FirstReg = Reg = getDRegFromQReg(Reg); + Spacing = 1; // double-spacing requires explicit D registers, otherwise + // it's ambiguous with four-register single spaced. ++Reg; ++Count; } + if (parseVectorLane(LaneKind, LaneIndex) != MatchOperand_Success) + return MatchOperand_ParseFail; while (Parser.getTok().is(AsmToken::Comma) || Parser.getTok().is(AsmToken::Minus)) { if (Parser.getTok().is(AsmToken::Minus)) { + if (!Spacing) + Spacing = 1; // Register range implies a single spaced list. + else if (Spacing == 2) { + Error(Parser.getTok().getLoc(), + "sequential registers in double spaced list"); + return MatchOperand_ParseFail; + } Parser.Lex(); // Eat the minus. SMLoc EndLoc = Parser.getTok().getLoc(); int EndReg = tryParseRegister(); @@ -2554,6 +2880,16 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Error(EndLoc, "bad range in register list"); return MatchOperand_ParseFail; } + // Parse the lane specifier if present. + VectorLaneTy NextLaneKind; + unsigned NextLaneIndex; + if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + return MatchOperand_ParseFail; + if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { + Error(EndLoc, "mismatched lane index in register list"); + return MatchOperand_ParseFail; + } + EndLoc = Parser.getTok().getLoc(); // Add all the registers in the range to the register list. Count += EndReg - Reg; @@ -2575,6 +2911,13 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // The list is of D registers, but we also allow Q regs and just interpret // them as the two D sub-registers. if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { + if (!Spacing) + Spacing = 1; // Register range implies a single spaced list. + else if (Spacing == 2) { + Error(RegLoc, + "invalid register in double-spaced list (must be 'D' register')"); + return MatchOperand_ParseFail; + } Reg = getDRegFromQReg(Reg); if (Reg != OldReg + 1) { Error(RegLoc, "non-contiguous register range"); @@ -2582,14 +2925,45 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } ++Reg; Count += 2; + // Parse the lane specifier if present. + VectorLaneTy NextLaneKind; + unsigned NextLaneIndex; + SMLoc EndLoc = Parser.getTok().getLoc(); + if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + return MatchOperand_ParseFail; + if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { + Error(EndLoc, "mismatched lane index in register list"); + return MatchOperand_ParseFail; + } continue; } - // Normal D register. Just check that it's contiguous and keep going. - if (Reg != OldReg + 1) { + // Normal D register. + // Figure out the register spacing (single or double) of the list if + // we don't know it already. + if (!Spacing) + Spacing = 1 + (Reg == OldReg + 2); + + // Just check that it's contiguous and keep going. + if (Reg != OldReg + Spacing) { Error(RegLoc, "non-contiguous register range"); return MatchOperand_ParseFail; } ++Count; + // Parse the lane specifier if present. + VectorLaneTy NextLaneKind; + unsigned NextLaneIndex; + SMLoc EndLoc = Parser.getTok().getLoc(); + if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + return MatchOperand_ParseFail; + if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { + Error(EndLoc, "mismatched lane index in register list"); + return MatchOperand_ParseFail; + } + if (Spacing == 2 && LaneKind != NoLanes) { + Error(EndLoc, + "lane index specfier invalid in double spaced register list"); + return MatchOperand_ParseFail; + } } SMLoc E = Parser.getTok().getLoc(); @@ -2599,7 +2973,22 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } Parser.Lex(); // Eat '}' token. - Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count, S, E)); + switch (LaneKind) { + default: + assert(0 && "unexpected lane kind in register list."); + case NoLanes: + Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count, + (Spacing == 2), S, E)); + break; + case AllLanes: + Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count, + S, E)); + break; + case IndexedLane: + Operands.push_back(ARMOperand::CreateVectorListIndexed(FirstReg, Count, + LaneIndex, S, E)); + break; + } return MatchOperand_Success; } @@ -2786,7 +3175,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op, Parser.Lex(); // Eat shift type token. // There must be a '#' and a shift amount. - if (Parser.getTok().isNot(AsmToken::Hash)) { + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { Error(Parser.getTok().getLoc(), "'#' expected"); return MatchOperand_ParseFail; } @@ -2864,7 +3254,8 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat the operator. // A '#' and a shift amount. - if (Parser.getTok().isNot(AsmToken::Hash)) { + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { Error(Parser.getTok().getLoc(), "'#' expected"); return MatchOperand_ParseFail; } @@ -2924,7 +3315,8 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat the operator. // A '#' and a rotate amount. - if (Parser.getTok().isNot(AsmToken::Hash)) { + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { Error(Parser.getTok().getLoc(), "'#' expected"); return MatchOperand_ParseFail; } @@ -2961,7 +3353,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); // The bitfield descriptor is really two operands, the LSB and the width. - if (Parser.getTok().isNot(AsmToken::Hash)) { + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { Error(Parser.getTok().getLoc(), "'#' expected"); return MatchOperand_ParseFail; } @@ -2993,7 +3386,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_ParseFail; } Parser.Lex(); // Eat hash token. - if (Parser.getTok().isNot(AsmToken::Hash)) { + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { Error(Parser.getTok().getLoc(), "'#' expected"); return MatchOperand_ParseFail; } @@ -3087,7 +3481,8 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Tok.getLoc(); // Do immediates first, as we always parse those if we have a '#'. - if (Parser.getTok().is(AsmToken::Hash)) { + if (Parser.getTok().is(AsmToken::Hash) || + Parser.getTok().is(AsmToken::Dollar)) { Parser.Lex(); // Eat the '#'. // Explicitly look for a '-', as we need to encode negative zero // differently. @@ -3444,7 +3839,7 @@ bool ARMAsmParser:: cvtVLDwbFixed(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Vd - ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); // Vn @@ -3458,7 +3853,7 @@ bool ARMAsmParser:: cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Vd - ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); // Vn @@ -3478,7 +3873,7 @@ cvtVSTwbFixed(MCInst &Inst, unsigned Opcode, // Vn ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); // Vt - ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); return true; @@ -3494,7 +3889,7 @@ cvtVSTwbRegister(MCInst &Inst, unsigned Opcode, // Vm ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); // Vt - ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); return true; @@ -3591,8 +3986,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // offset. Be friendly and also accept a plain integer (without a leading // hash) for gas compatibility. if (Parser.getTok().is(AsmToken::Hash) || + Parser.getTok().is(AsmToken::Dollar) || Parser.getTok().is(AsmToken::Integer)) { - if (Parser.getTok().is(AsmToken::Hash)) + if (Parser.getTok().isNot(AsmToken::Integer)) Parser.Lex(); // Eat the '#'. E = Parser.getTok().getLoc(); @@ -3690,7 +4086,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, if (Tok.isNot(AsmToken::Identifier)) return true; StringRef ShiftName = Tok.getString(); - if (ShiftName == "lsl" || ShiftName == "LSL") + if (ShiftName == "lsl" || ShiftName == "LSL" || + ShiftName == "asl" || ShiftName == "ASL") St = ARM_AM::lsl; else if (ShiftName == "lsr" || ShiftName == "LSR") St = ARM_AM::lsr; @@ -3710,7 +4107,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, Loc = Parser.getTok().getLoc(); // A '#' and a shift amount. const AsmToken &HashTok = Parser.getTok(); - if (HashTok.isNot(AsmToken::Hash)) + if (HashTok.isNot(AsmToken::Hash) && + HashTok.isNot(AsmToken::Dollar)) return Error(HashTok.getLoc(), "'#' expected"); Parser.Lex(); // Eat hash token. @@ -3739,7 +4137,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); - if (Parser.getTok().isNot(AsmToken::Hash)) + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; // Disambiguate the VMOV forms that can accept an FP immediate. @@ -3852,6 +4251,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, return parseMemory(Operands); case AsmToken::LCurly: return parseRegisterList(Operands); + case AsmToken::Dollar: case AsmToken::Hash: { // #42 -> immediate. // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate @@ -3990,7 +4390,9 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "mrs" || Mnemonic == "smmls" || Mnemonic == "vabs" || Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vmrs" || Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" || - Mnemonic == "vrsqrts" || Mnemonic == "srs" || + Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" || + Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" || + Mnemonic == "fsts" || (Mnemonic == "movs" && isThumb()))) { Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1); CarrySetting = true; @@ -4206,9 +4608,27 @@ static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) { return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm"); } +static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features); /// Parse an arm instruction mnemonic followed by its operands. bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Apply mnemonic aliases before doing anything else, as the destination + // mnemnonic may include suffices and we want to handle them normally. + // The generic tblgen'erated code does this later, at the start of + // MatchInstructionImpl(), but that's too late for aliases that include + // any sort of suffix. + unsigned AvailableFeatures = getAvailableFeatures(); + applyMnemonicAliases(Name, AvailableFeatures); + + // First check for the ARM-specific .req directive. + if (Parser.getTok().is(AsmToken::Identifier) && + Parser.getTok().getIdentifier() == ".req") { + parseDirectiveReq(Name, NameLoc); + // We always return 'error' for this, as we're done with this + // statement and don't need to match the 'instruction." + return true; + } + // Create the leading tokens for the mnemonic, split by '.' characters. size_t Start = 0, Next = Name.find('.'); StringRef Mnemonic = Name.slice(Start, Next); @@ -4400,12 +4820,21 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, } } // Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the - // end. Convert it to a token here. + // end. Convert it to a token here. Take care not to convert those + // that should hit the Thumb2 encoding. if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 && + static_cast<ARMOperand*>(Operands[3])->isReg() && + static_cast<ARMOperand*>(Operands[4])->isReg() && static_cast<ARMOperand*>(Operands[5])->isImm()) { ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]); const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); - if (CE && CE->getValue() == 0) { + if (CE && CE->getValue() == 0 && + (isThumbOne() || + // The cc_out operand matches the IT block. + ((inITBlock() != CarrySetting) && + // Neither register operand is a high register. + (isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) && + isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()))))){ Operands.erase(Operands.begin() + 5); Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc())); delete Op; @@ -4605,11 +5034,495 @@ validateInstruction(MCInst &Inst, return false; } +static unsigned getRealVSTLNOpcode(unsigned Opc) { + switch(Opc) { + default: assert(0 && "unexpected opcode!"); + // VST1LN + case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_P8: + case ARM::VST1LNdWB_fixed_Asm_I8: case ARM::VST1LNdWB_fixed_Asm_S8: + case ARM::VST1LNdWB_fixed_Asm_U8: + return ARM::VST1LNd8_UPD; + case ARM::VST1LNdWB_fixed_Asm_16: case ARM::VST1LNdWB_fixed_Asm_P16: + case ARM::VST1LNdWB_fixed_Asm_I16: case ARM::VST1LNdWB_fixed_Asm_S16: + case ARM::VST1LNdWB_fixed_Asm_U16: + return ARM::VST1LNd16_UPD; + case ARM::VST1LNdWB_fixed_Asm_32: case ARM::VST1LNdWB_fixed_Asm_F: + case ARM::VST1LNdWB_fixed_Asm_F32: case ARM::VST1LNdWB_fixed_Asm_I32: + case ARM::VST1LNdWB_fixed_Asm_S32: case ARM::VST1LNdWB_fixed_Asm_U32: + return ARM::VST1LNd32_UPD; + case ARM::VST1LNdWB_register_Asm_8: case ARM::VST1LNdWB_register_Asm_P8: + case ARM::VST1LNdWB_register_Asm_I8: case ARM::VST1LNdWB_register_Asm_S8: + case ARM::VST1LNdWB_register_Asm_U8: + return ARM::VST1LNd8_UPD; + case ARM::VST1LNdWB_register_Asm_16: case ARM::VST1LNdWB_register_Asm_P16: + case ARM::VST1LNdWB_register_Asm_I16: case ARM::VST1LNdWB_register_Asm_S16: + case ARM::VST1LNdWB_register_Asm_U16: + return ARM::VST1LNd16_UPD; + case ARM::VST1LNdWB_register_Asm_32: case ARM::VST1LNdWB_register_Asm_F: + case ARM::VST1LNdWB_register_Asm_F32: case ARM::VST1LNdWB_register_Asm_I32: + case ARM::VST1LNdWB_register_Asm_S32: case ARM::VST1LNdWB_register_Asm_U32: + return ARM::VST1LNd32_UPD; + case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_P8: + case ARM::VST1LNdAsm_I8: case ARM::VST1LNdAsm_S8: + case ARM::VST1LNdAsm_U8: + return ARM::VST1LNd8; + case ARM::VST1LNdAsm_16: case ARM::VST1LNdAsm_P16: + case ARM::VST1LNdAsm_I16: case ARM::VST1LNdAsm_S16: + case ARM::VST1LNdAsm_U16: + return ARM::VST1LNd16; + case ARM::VST1LNdAsm_32: case ARM::VST1LNdAsm_F: + case ARM::VST1LNdAsm_F32: case ARM::VST1LNdAsm_I32: + case ARM::VST1LNdAsm_S32: case ARM::VST1LNdAsm_U32: + return ARM::VST1LNd32; + + // VST2LN + case ARM::VST2LNdWB_fixed_Asm_8: case ARM::VST2LNdWB_fixed_Asm_P8: + case ARM::VST2LNdWB_fixed_Asm_I8: case ARM::VST2LNdWB_fixed_Asm_S8: + case ARM::VST2LNdWB_fixed_Asm_U8: + return ARM::VST2LNd8_UPD; + case ARM::VST2LNdWB_fixed_Asm_16: case ARM::VST2LNdWB_fixed_Asm_P16: + case ARM::VST2LNdWB_fixed_Asm_I16: case ARM::VST2LNdWB_fixed_Asm_S16: + case ARM::VST2LNdWB_fixed_Asm_U16: + return ARM::VST2LNd16_UPD; + case ARM::VST2LNdWB_fixed_Asm_32: case ARM::VST2LNdWB_fixed_Asm_F: + case ARM::VST2LNdWB_fixed_Asm_F32: case ARM::VST2LNdWB_fixed_Asm_I32: + case ARM::VST2LNdWB_fixed_Asm_S32: case ARM::VST2LNdWB_fixed_Asm_U32: + return ARM::VST2LNd32_UPD; + case ARM::VST2LNdWB_register_Asm_8: case ARM::VST2LNdWB_register_Asm_P8: + case ARM::VST2LNdWB_register_Asm_I8: case ARM::VST2LNdWB_register_Asm_S8: + case ARM::VST2LNdWB_register_Asm_U8: + return ARM::VST2LNd8_UPD; + case ARM::VST2LNdWB_register_Asm_16: case ARM::VST2LNdWB_register_Asm_P16: + case ARM::VST2LNdWB_register_Asm_I16: case ARM::VST2LNdWB_register_Asm_S16: + case ARM::VST2LNdWB_register_Asm_U16: + return ARM::VST2LNd16_UPD; + case ARM::VST2LNdWB_register_Asm_32: case ARM::VST2LNdWB_register_Asm_F: + case ARM::VST2LNdWB_register_Asm_F32: case ARM::VST2LNdWB_register_Asm_I32: + case ARM::VST2LNdWB_register_Asm_S32: case ARM::VST2LNdWB_register_Asm_U32: + return ARM::VST2LNd32_UPD; + case ARM::VST2LNdAsm_8: case ARM::VST2LNdAsm_P8: + case ARM::VST2LNdAsm_I8: case ARM::VST2LNdAsm_S8: + case ARM::VST2LNdAsm_U8: + return ARM::VST2LNd8; + case ARM::VST2LNdAsm_16: case ARM::VST2LNdAsm_P16: + case ARM::VST2LNdAsm_I16: case ARM::VST2LNdAsm_S16: + case ARM::VST2LNdAsm_U16: + return ARM::VST2LNd16; + case ARM::VST2LNdAsm_32: case ARM::VST2LNdAsm_F: + case ARM::VST2LNdAsm_F32: case ARM::VST2LNdAsm_I32: + case ARM::VST2LNdAsm_S32: case ARM::VST2LNdAsm_U32: + return ARM::VST2LNd32; + } +} + +static unsigned getRealVLDLNOpcode(unsigned Opc) { + switch(Opc) { + default: assert(0 && "unexpected opcode!"); + // VLD1LN + case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_P8: + case ARM::VLD1LNdWB_fixed_Asm_I8: case ARM::VLD1LNdWB_fixed_Asm_S8: + case ARM::VLD1LNdWB_fixed_Asm_U8: + return ARM::VLD1LNd8_UPD; + case ARM::VLD1LNdWB_fixed_Asm_16: case ARM::VLD1LNdWB_fixed_Asm_P16: + case ARM::VLD1LNdWB_fixed_Asm_I16: case ARM::VLD1LNdWB_fixed_Asm_S16: + case ARM::VLD1LNdWB_fixed_Asm_U16: + return ARM::VLD1LNd16_UPD; + case ARM::VLD1LNdWB_fixed_Asm_32: case ARM::VLD1LNdWB_fixed_Asm_F: + case ARM::VLD1LNdWB_fixed_Asm_F32: case ARM::VLD1LNdWB_fixed_Asm_I32: + case ARM::VLD1LNdWB_fixed_Asm_S32: case ARM::VLD1LNdWB_fixed_Asm_U32: + return ARM::VLD1LNd32_UPD; + case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_P8: + case ARM::VLD1LNdWB_register_Asm_I8: case ARM::VLD1LNdWB_register_Asm_S8: + case ARM::VLD1LNdWB_register_Asm_U8: + return ARM::VLD1LNd8_UPD; + case ARM::VLD1LNdWB_register_Asm_16: case ARM::VLD1LNdWB_register_Asm_P16: + case ARM::VLD1LNdWB_register_Asm_I16: case ARM::VLD1LNdWB_register_Asm_S16: + case ARM::VLD1LNdWB_register_Asm_U16: + return ARM::VLD1LNd16_UPD; + case ARM::VLD1LNdWB_register_Asm_32: case ARM::VLD1LNdWB_register_Asm_F: + case ARM::VLD1LNdWB_register_Asm_F32: case ARM::VLD1LNdWB_register_Asm_I32: + case ARM::VLD1LNdWB_register_Asm_S32: case ARM::VLD1LNdWB_register_Asm_U32: + return ARM::VLD1LNd32_UPD; + case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_P8: + case ARM::VLD1LNdAsm_I8: case ARM::VLD1LNdAsm_S8: + case ARM::VLD1LNdAsm_U8: + return ARM::VLD1LNd8; + case ARM::VLD1LNdAsm_16: case ARM::VLD1LNdAsm_P16: + case ARM::VLD1LNdAsm_I16: case ARM::VLD1LNdAsm_S16: + case ARM::VLD1LNdAsm_U16: + return ARM::VLD1LNd16; + case ARM::VLD1LNdAsm_32: case ARM::VLD1LNdAsm_F: + case ARM::VLD1LNdAsm_F32: case ARM::VLD1LNdAsm_I32: + case ARM::VLD1LNdAsm_S32: case ARM::VLD1LNdAsm_U32: + return ARM::VLD1LNd32; + + // VLD2LN + case ARM::VLD2LNdWB_fixed_Asm_8: case ARM::VLD2LNdWB_fixed_Asm_P8: + case ARM::VLD2LNdWB_fixed_Asm_I8: case ARM::VLD2LNdWB_fixed_Asm_S8: + case ARM::VLD2LNdWB_fixed_Asm_U8: + return ARM::VLD2LNd8_UPD; + case ARM::VLD2LNdWB_fixed_Asm_16: case ARM::VLD2LNdWB_fixed_Asm_P16: + case ARM::VLD2LNdWB_fixed_Asm_I16: case ARM::VLD2LNdWB_fixed_Asm_S16: + case ARM::VLD2LNdWB_fixed_Asm_U16: + return ARM::VLD2LNd16_UPD; + case ARM::VLD2LNdWB_fixed_Asm_32: case ARM::VLD2LNdWB_fixed_Asm_F: + case ARM::VLD2LNdWB_fixed_Asm_F32: case ARM::VLD2LNdWB_fixed_Asm_I32: + case ARM::VLD2LNdWB_fixed_Asm_S32: case ARM::VLD2LNdWB_fixed_Asm_U32: + return ARM::VLD2LNd32_UPD; + case ARM::VLD2LNdWB_register_Asm_8: case ARM::VLD2LNdWB_register_Asm_P8: + case ARM::VLD2LNdWB_register_Asm_I8: case ARM::VLD2LNdWB_register_Asm_S8: + case ARM::VLD2LNdWB_register_Asm_U8: + return ARM::VLD2LNd8_UPD; + case ARM::VLD2LNdWB_register_Asm_16: case ARM::VLD2LNdWB_register_Asm_P16: + case ARM::VLD2LNdWB_register_Asm_I16: case ARM::VLD2LNdWB_register_Asm_S16: + case ARM::VLD2LNdWB_register_Asm_U16: + return ARM::VLD2LNd16_UPD; + case ARM::VLD2LNdWB_register_Asm_32: case ARM::VLD2LNdWB_register_Asm_F: + case ARM::VLD2LNdWB_register_Asm_F32: case ARM::VLD2LNdWB_register_Asm_I32: + case ARM::VLD2LNdWB_register_Asm_S32: case ARM::VLD2LNdWB_register_Asm_U32: + return ARM::VLD2LNd32_UPD; + case ARM::VLD2LNdAsm_8: case ARM::VLD2LNdAsm_P8: + case ARM::VLD2LNdAsm_I8: case ARM::VLD2LNdAsm_S8: + case ARM::VLD2LNdAsm_U8: + return ARM::VLD2LNd8; + case ARM::VLD2LNdAsm_16: case ARM::VLD2LNdAsm_P16: + case ARM::VLD2LNdAsm_I16: case ARM::VLD2LNdAsm_S16: + case ARM::VLD2LNdAsm_U16: + return ARM::VLD2LNd16; + case ARM::VLD2LNdAsm_32: case ARM::VLD2LNdAsm_F: + case ARM::VLD2LNdAsm_F32: case ARM::VLD2LNdAsm_I32: + case ARM::VLD2LNdAsm_S32: case ARM::VLD2LNdAsm_U32: + return ARM::VLD2LNd32; + } +} + bool ARMAsmParser:: processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { switch (Inst.getOpcode()) { - // Handle the MOV complex aliases. + // Handle NEON VST complex aliases. + case ARM::VST1LNdWB_register_Asm_8: case ARM::VST1LNdWB_register_Asm_P8: + case ARM::VST1LNdWB_register_Asm_I8: case ARM::VST1LNdWB_register_Asm_S8: + case ARM::VST1LNdWB_register_Asm_U8: case ARM::VST1LNdWB_register_Asm_16: + case ARM::VST1LNdWB_register_Asm_P16: case ARM::VST1LNdWB_register_Asm_I16: + case ARM::VST1LNdWB_register_Asm_S16: case ARM::VST1LNdWB_register_Asm_U16: + case ARM::VST1LNdWB_register_Asm_32: case ARM::VST1LNdWB_register_Asm_F: + case ARM::VST1LNdWB_register_Asm_F32: case ARM::VST1LNdWB_register_Asm_I32: + case ARM::VST1LNdWB_register_Asm_S32: case ARM::VST1LNdWB_register_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VST2LNdWB_register_Asm_8: case ARM::VST2LNdWB_register_Asm_P8: + case ARM::VST2LNdWB_register_Asm_I8: case ARM::VST2LNdWB_register_Asm_S8: + case ARM::VST2LNdWB_register_Asm_U8: case ARM::VST2LNdWB_register_Asm_16: + case ARM::VST2LNdWB_register_Asm_P16: case ARM::VST2LNdWB_register_Asm_I16: + case ARM::VST2LNdWB_register_Asm_S16: case ARM::VST2LNdWB_register_Asm_U16: + case ARM::VST2LNdWB_register_Asm_32: case ARM::VST2LNdWB_register_Asm_F: + case ARM::VST2LNdWB_register_Asm_F32: case ARM::VST2LNdWB_register_Asm_I32: + case ARM::VST2LNdWB_register_Asm_S32: case ARM::VST2LNdWB_register_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_P8: + case ARM::VST1LNdWB_fixed_Asm_I8: case ARM::VST1LNdWB_fixed_Asm_S8: + case ARM::VST1LNdWB_fixed_Asm_U8: case ARM::VST1LNdWB_fixed_Asm_16: + case ARM::VST1LNdWB_fixed_Asm_P16: case ARM::VST1LNdWB_fixed_Asm_I16: + case ARM::VST1LNdWB_fixed_Asm_S16: case ARM::VST1LNdWB_fixed_Asm_U16: + case ARM::VST1LNdWB_fixed_Asm_32: case ARM::VST1LNdWB_fixed_Asm_F: + case ARM::VST1LNdWB_fixed_Asm_F32: case ARM::VST1LNdWB_fixed_Asm_I32: + case ARM::VST1LNdWB_fixed_Asm_S32: case ARM::VST1LNdWB_fixed_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VST2LNdWB_fixed_Asm_8: case ARM::VST2LNdWB_fixed_Asm_P8: + case ARM::VST2LNdWB_fixed_Asm_I8: case ARM::VST2LNdWB_fixed_Asm_S8: + case ARM::VST2LNdWB_fixed_Asm_U8: case ARM::VST2LNdWB_fixed_Asm_16: + case ARM::VST2LNdWB_fixed_Asm_P16: case ARM::VST2LNdWB_fixed_Asm_I16: + case ARM::VST2LNdWB_fixed_Asm_S16: case ARM::VST2LNdWB_fixed_Asm_U16: + case ARM::VST2LNdWB_fixed_Asm_32: case ARM::VST2LNdWB_fixed_Asm_F: + case ARM::VST2LNdWB_fixed_Asm_F32: case ARM::VST2LNdWB_fixed_Asm_I32: + case ARM::VST2LNdWB_fixed_Asm_S32: case ARM::VST2LNdWB_fixed_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_P8: case ARM::VST1LNdAsm_I8: + case ARM::VST1LNdAsm_S8: case ARM::VST1LNdAsm_U8: case ARM::VST1LNdAsm_16: + case ARM::VST1LNdAsm_P16: case ARM::VST1LNdAsm_I16: case ARM::VST1LNdAsm_S16: + case ARM::VST1LNdAsm_U16: case ARM::VST1LNdAsm_32: case ARM::VST1LNdAsm_F: + case ARM::VST1LNdAsm_F32: case ARM::VST1LNdAsm_I32: case ARM::VST1LNdAsm_S32: + case ARM::VST1LNdAsm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VST2LNdAsm_8: case ARM::VST2LNdAsm_P8: case ARM::VST2LNdAsm_I8: + case ARM::VST2LNdAsm_S8: case ARM::VST2LNdAsm_U8: case ARM::VST2LNdAsm_16: + case ARM::VST2LNdAsm_P16: case ARM::VST2LNdAsm_I16: case ARM::VST2LNdAsm_S16: + case ARM::VST2LNdAsm_U16: case ARM::VST2LNdAsm_32: case ARM::VST2LNdAsm_F: + case ARM::VST2LNdAsm_F32: case ARM::VST2LNdAsm_I32: case ARM::VST2LNdAsm_S32: + case ARM::VST2LNdAsm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // Handle NEON VLD complex aliases. + case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_P8: + case ARM::VLD1LNdWB_register_Asm_I8: case ARM::VLD1LNdWB_register_Asm_S8: + case ARM::VLD1LNdWB_register_Asm_U8: case ARM::VLD1LNdWB_register_Asm_16: + case ARM::VLD1LNdWB_register_Asm_P16: case ARM::VLD1LNdWB_register_Asm_I16: + case ARM::VLD1LNdWB_register_Asm_S16: case ARM::VLD1LNdWB_register_Asm_U16: + case ARM::VLD1LNdWB_register_Asm_32: case ARM::VLD1LNdWB_register_Asm_F: + case ARM::VLD1LNdWB_register_Asm_F32: case ARM::VLD1LNdWB_register_Asm_I32: + case ARM::VLD1LNdWB_register_Asm_S32: case ARM::VLD1LNdWB_register_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VLD2LNdWB_register_Asm_8: case ARM::VLD2LNdWB_register_Asm_P8: + case ARM::VLD2LNdWB_register_Asm_I8: case ARM::VLD2LNdWB_register_Asm_S8: + case ARM::VLD2LNdWB_register_Asm_U8: case ARM::VLD2LNdWB_register_Asm_16: + case ARM::VLD2LNdWB_register_Asm_P16: case ARM::VLD2LNdWB_register_Asm_I16: + case ARM::VLD2LNdWB_register_Asm_S16: case ARM::VLD2LNdWB_register_Asm_U16: + case ARM::VLD2LNdWB_register_Asm_32: case ARM::VLD2LNdWB_register_Asm_F: + case ARM::VLD2LNdWB_register_Asm_F32: case ARM::VLD2LNdWB_register_Asm_I32: + case ARM::VLD2LNdWB_register_Asm_S32: case ARM::VLD2LNdWB_register_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_P8: + case ARM::VLD1LNdWB_fixed_Asm_I8: case ARM::VLD1LNdWB_fixed_Asm_S8: + case ARM::VLD1LNdWB_fixed_Asm_U8: case ARM::VLD1LNdWB_fixed_Asm_16: + case ARM::VLD1LNdWB_fixed_Asm_P16: case ARM::VLD1LNdWB_fixed_Asm_I16: + case ARM::VLD1LNdWB_fixed_Asm_S16: case ARM::VLD1LNdWB_fixed_Asm_U16: + case ARM::VLD1LNdWB_fixed_Asm_32: case ARM::VLD1LNdWB_fixed_Asm_F: + case ARM::VLD1LNdWB_fixed_Asm_F32: case ARM::VLD1LNdWB_fixed_Asm_I32: + case ARM::VLD1LNdWB_fixed_Asm_S32: case ARM::VLD1LNdWB_fixed_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD2LNdWB_fixed_Asm_8: case ARM::VLD2LNdWB_fixed_Asm_P8: + case ARM::VLD2LNdWB_fixed_Asm_I8: case ARM::VLD2LNdWB_fixed_Asm_S8: + case ARM::VLD2LNdWB_fixed_Asm_U8: case ARM::VLD2LNdWB_fixed_Asm_16: + case ARM::VLD2LNdWB_fixed_Asm_P16: case ARM::VLD2LNdWB_fixed_Asm_I16: + case ARM::VLD2LNdWB_fixed_Asm_S16: case ARM::VLD2LNdWB_fixed_Asm_U16: + case ARM::VLD2LNdWB_fixed_Asm_32: case ARM::VLD2LNdWB_fixed_Asm_F: + case ARM::VLD2LNdWB_fixed_Asm_F32: case ARM::VLD2LNdWB_fixed_Asm_I32: + case ARM::VLD2LNdWB_fixed_Asm_S32: case ARM::VLD2LNdWB_fixed_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_P8: case ARM::VLD1LNdAsm_I8: + case ARM::VLD1LNdAsm_S8: case ARM::VLD1LNdAsm_U8: case ARM::VLD1LNdAsm_16: + case ARM::VLD1LNdAsm_P16: case ARM::VLD1LNdAsm_I16: case ARM::VLD1LNdAsm_S16: + case ARM::VLD1LNdAsm_U16: case ARM::VLD1LNdAsm_32: case ARM::VLD1LNdAsm_F: + case ARM::VLD1LNdAsm_F32: case ARM::VLD1LNdAsm_I32: case ARM::VLD1LNdAsm_S32: + case ARM::VLD1LNdAsm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD2LNdAsm_8: case ARM::VLD2LNdAsm_P8: case ARM::VLD2LNdAsm_I8: + case ARM::VLD2LNdAsm_S8: case ARM::VLD2LNdAsm_U8: case ARM::VLD2LNdAsm_16: + case ARM::VLD2LNdAsm_P16: case ARM::VLD2LNdAsm_I16: case ARM::VLD2LNdAsm_S16: + case ARM::VLD2LNdAsm_U16: case ARM::VLD2LNdAsm_32: case ARM::VLD2LNdAsm_F: + case ARM::VLD2LNdAsm_F32: case ARM::VLD2LNdAsm_I32: case ARM::VLD2LNdAsm_S32: + case ARM::VLD2LNdAsm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // Handle the Thumb2 mode MOV complex aliases. + case ARM::t2MOVsi: + case ARM::t2MOVSsi: { + // Which instruction to expand to depends on the CCOut operand and + // whether we're in an IT block if the register operands are low + // registers. + bool isNarrow = false; + if (isARMLowRegister(Inst.getOperand(0).getReg()) && + isARMLowRegister(Inst.getOperand(1).getReg()) && + inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi)) + isNarrow = true; + MCInst TmpInst; + unsigned newOpc; + switch(ARM_AM::getSORegShOp(Inst.getOperand(2).getImm())) { + default: llvm_unreachable("unexpected opcode!"); + case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break; + case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break; + case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break; + case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break; + } + unsigned Ammount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()); + if (Ammount == 32) Ammount = 0; + TmpInst.setOpcode(newOpc); + TmpInst.addOperand(Inst.getOperand(0)); // Rd + if (isNarrow) + TmpInst.addOperand(MCOperand::CreateReg( + Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(MCOperand::CreateImm(Ammount)); + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + if (!isNarrow) + TmpInst.addOperand(MCOperand::CreateReg( + Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0)); + Inst = TmpInst; + return true; + } + // Handle the ARM mode MOV complex aliases. case ARM::ASRr: case ARM::LSRr: case ARM::LSLr: @@ -4743,6 +5656,24 @@ processInstruction(MCInst &Inst, Inst = TmpInst; } break; + case ARM::t2ADDri12: + // If the immediate fits for encoding T3 (t2ADDri) and the generic "add" + // mnemonic was used (not "addw"), encoding T3 is preferred. + if (static_cast<ARMOperand*>(Operands[0])->getToken() != "add" || + ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1) + break; + Inst.setOpcode(ARM::t2ADDri); + Inst.addOperand(MCOperand::CreateReg(0)); // cc_out + break; + case ARM::t2SUBri12: + // If the immediate fits for encoding T3 (t2SUBri) and the generic "sub" + // mnemonic was used (not "subw"), encoding T3 is preferred. + if (static_cast<ARMOperand*>(Operands[0])->getToken() != "sub" || + ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1) + break; + Inst.setOpcode(ARM::t2SUBri); + Inst.addOperand(MCOperand::CreateReg(0)); // cc_out + break; case ARM::tADDi8: // If the immediate is in the range 0-7, we want tADDi3 iff Rd was // explicitly specified. From the ARM ARM: "Encoding T1 is preferred @@ -4763,6 +5694,26 @@ processInstruction(MCInst &Inst, return true; } break; + case ARM::t2ADDrr: { + // If the destination and first source operand are the same, and + // there's no setting of the flags, use encoding T2 instead of T3. + // Note that this is only for ADD, not SUB. This mirrors the system + // 'as' behaviour. Make sure the wide encoding wasn't explicit. + if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() || + Inst.getOperand(5).getReg() != 0 || + (static_cast<ARMOperand*>(Operands[3])->isToken() && + static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) + break; + MCInst TmpInst; + TmpInst.setOpcode(ARM::tADDhirr); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } case ARM::tB: // A Thumb conditional branch outside of an IT block is a tBcc. if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) { @@ -5079,12 +6030,16 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveWord(4, DirectiveID.getLoc()); else if (IDVal == ".thumb") return parseDirectiveThumb(DirectiveID.getLoc()); + else if (IDVal == ".arm") + return parseDirectiveARM(DirectiveID.getLoc()); else if (IDVal == ".thumb_func") return parseDirectiveThumbFunc(DirectiveID.getLoc()); else if (IDVal == ".code") return parseDirectiveCode(DirectiveID.getLoc()); else if (IDVal == ".syntax") return parseDirectiveSyntax(DirectiveID.getLoc()); + else if (IDVal == ".unreq") + return parseDirectiveUnreq(DirectiveID.getLoc()); return true; } @@ -5120,9 +6075,22 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { return Error(L, "unexpected token in directive"); Parser.Lex(); - // TODO: set thumb mode - // TODO: tell the MC streamer the mode - // getParser().getStreamer().Emit???(); + if (!isThumb()) + SwitchMode(); + getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); + return false; +} + +/// parseDirectiveARM +/// ::= .arm +bool ARMAsmParser::parseDirectiveARM(SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + + if (isThumb()) + SwitchMode(); + getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); return false; } @@ -5212,6 +6180,45 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) { return false; } +/// parseDirectiveReq +/// ::= name .req registername +bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { + Parser.Lex(); // Eat the '.req' token. + unsigned Reg; + SMLoc SRegLoc, ERegLoc; + if (ParseRegister(Reg, SRegLoc, ERegLoc)) { + Parser.EatToEndOfStatement(); + return Error(SRegLoc, "register name expected"); + } + + // Shouldn't be anything else. + if (Parser.getTok().isNot(AsmToken::EndOfStatement)) { + Parser.EatToEndOfStatement(); + return Error(Parser.getTok().getLoc(), + "unexpected input in .req directive."); + } + + Parser.Lex(); // Consume the EndOfStatement + + if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg) + return Error(SRegLoc, "redefinition of '" + Name + + "' does not match original."); + + return false; +} + +/// parseDirectiveUneq +/// ::= .unreq registername +bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { + if (Parser.getTok().isNot(AsmToken::Identifier)) { + Parser.EatToEndOfStatement(); + return Error(L, "unexpected input in .unreq directive."); + } + RegisterReqs.erase(Parser.getTok().getIdentifier()); + Parser.Lex(); // Eat the identifier. + return false; +} + extern "C" void LLVMInitializeARMAsmLexer(); /// Force static initialization. diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt index 3f5ad39..e24a1b1 100644 --- a/lib/Target/ARM/AsmParser/CMakeLists.txt +++ b/lib/Target/ARM/AsmParser/CMakeLists.txt @@ -6,11 +6,3 @@ add_llvm_library(LLVMARMAsmParser ) add_dependencies(LLVMARMAsmParser ARMCommonTableGen) - -add_llvm_library_dependencies(LLVMARMAsmParser - LLVMARMDesc - LLVMARMInfo - LLVMMC - LLVMMCParser - LLVMSupport - ) diff --git a/lib/Target/ARM/AsmParser/LLVMBuild.txt b/lib/Target/ARM/AsmParser/LLVMBuild.txt index cbf9b4b..f0184b6 100644 --- a/lib/Target/ARM/AsmParser/LLVMBuild.txt +++ b/lib/Target/ARM/AsmParser/LLVMBuild.txt @@ -21,4 +21,3 @@ name = ARMAsmParser parent = ARM required_libraries = ARMDesc ARMInfo MC MCParser Support add_to_library_groups = ARM - diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 511932e..04cdf55 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -48,20 +48,6 @@ add_llvm_target(ARMCodeGen Thumb2SizeReduction.cpp ) -add_llvm_library_dependencies(LLVMARMCodeGen - LLVMARMAsmPrinter - LLVMARMDesc - LLVMARMInfo - LLVMAnalysis - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMC - LLVMSelectionDAG - LLVMSupport - LLVMTarget - ) - # workaround for hanging compilation on MSVC9, 10 if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 ) set_property( diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index ad250ab..49c64fd 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -2085,15 +2085,24 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VLD1d32Qwb_register: case ARM::VLD1d64Qwb_fixed: case ARM::VLD1d64Qwb_register: - case ARM::VLD2d8_UPD: - case ARM::VLD2d16_UPD: - case ARM::VLD2d32_UPD: - case ARM::VLD2q8_UPD: - case ARM::VLD2q16_UPD: - case ARM::VLD2q32_UPD: - case ARM::VLD2b8_UPD: - case ARM::VLD2b16_UPD: - case ARM::VLD2b32_UPD: + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d32wb_fixed: + case ARM::VLD2q8wb_fixed: + case ARM::VLD2q16wb_fixed: + case ARM::VLD2q32wb_fixed: + case ARM::VLD2d8wb_register: + case ARM::VLD2d16wb_register: + case ARM::VLD2d32wb_register: + case ARM::VLD2q8wb_register: + case ARM::VLD2q16wb_register: + case ARM::VLD2q32wb_register: + case ARM::VLD2b8wb_fixed: + case ARM::VLD2b16wb_fixed: + case ARM::VLD2b32wb_fixed: + case ARM::VLD2b8wb_register: + case ARM::VLD2b16wb_register: + case ARM::VLD2b32wb_register: case ARM::VLD3d8_UPD: case ARM::VLD3d16_UPD: case ARM::VLD3d32_UPD: @@ -2196,23 +2205,40 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VST1q16wb_register: case ARM::VST1q32wb_register: case ARM::VST1q64wb_register: - case ARM::VST1d8T_UPD: - case ARM::VST1d16T_UPD: - case ARM::VST1d32T_UPD: - case ARM::VST1d64T_UPD: - case ARM::VST1d8Q_UPD: - case ARM::VST1d16Q_UPD: - case ARM::VST1d32Q_UPD: - case ARM::VST1d64Q_UPD: - case ARM::VST2d8_UPD: - case ARM::VST2d16_UPD: - case ARM::VST2d32_UPD: - case ARM::VST2q8_UPD: - case ARM::VST2q16_UPD: - case ARM::VST2q32_UPD: - case ARM::VST2b8_UPD: - case ARM::VST2b16_UPD: - case ARM::VST2b32_UPD: + case ARM::VST1d8Twb_fixed: + case ARM::VST1d16Twb_fixed: + case ARM::VST1d32Twb_fixed: + case ARM::VST1d64Twb_fixed: + case ARM::VST1d8Twb_register: + case ARM::VST1d16Twb_register: + case ARM::VST1d32Twb_register: + case ARM::VST1d64Twb_register: + case ARM::VST1d8Qwb_fixed: + case ARM::VST1d16Qwb_fixed: + case ARM::VST1d32Qwb_fixed: + case ARM::VST1d64Qwb_fixed: + case ARM::VST1d8Qwb_register: + case ARM::VST1d16Qwb_register: + case ARM::VST1d32Qwb_register: + case ARM::VST1d64Qwb_register: + case ARM::VST2d8wb_fixed: + case ARM::VST2d16wb_fixed: + case ARM::VST2d32wb_fixed: + case ARM::VST2d8wb_register: + case ARM::VST2d16wb_register: + case ARM::VST2d32wb_register: + case ARM::VST2q8wb_fixed: + case ARM::VST2q16wb_fixed: + case ARM::VST2q32wb_fixed: + case ARM::VST2q8wb_register: + case ARM::VST2q16wb_register: + case ARM::VST2q32wb_register: + case ARM::VST2b8wb_fixed: + case ARM::VST2b16wb_fixed: + case ARM::VST2b32wb_fixed: + case ARM::VST2b8wb_register: + case ARM::VST2b16wb_register: + case ARM::VST2b32wb_register: case ARM::VST3d8_UPD: case ARM::VST3d16_UPD: case ARM::VST3d32_UPD: @@ -2264,34 +2290,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, // Second input register switch (Inst.getOpcode()) { - case ARM::VST1d8T: - case ARM::VST1d16T: - case ARM::VST1d32T: - case ARM::VST1d64T: - case ARM::VST1d8T_UPD: - case ARM::VST1d16T_UPD: - case ARM::VST1d32T_UPD: - case ARM::VST1d64T_UPD: - case ARM::VST1d8Q: - case ARM::VST1d16Q: - case ARM::VST1d32Q: - case ARM::VST1d64Q: - case ARM::VST1d8Q_UPD: - case ARM::VST1d16Q_UPD: - case ARM::VST1d32Q_UPD: - case ARM::VST1d64Q_UPD: - case ARM::VST2d8: - case ARM::VST2d16: - case ARM::VST2d32: - case ARM::VST2d8_UPD: - case ARM::VST2d16_UPD: - case ARM::VST2d32_UPD: - case ARM::VST2q8: - case ARM::VST2q16: - case ARM::VST2q32: - case ARM::VST2q8_UPD: - case ARM::VST2q16_UPD: - case ARM::VST2q32_UPD: case ARM::VST3d8: case ARM::VST3d16: case ARM::VST3d32: @@ -2307,12 +2305,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder))) return MCDisassembler::Fail; break; - case ARM::VST2b8: - case ARM::VST2b16: - case ARM::VST2b32: - case ARM::VST2b8_UPD: - case ARM::VST2b16_UPD: - case ARM::VST2b32_UPD: case ARM::VST3q8: case ARM::VST3q16: case ARM::VST3q32: @@ -2334,28 +2326,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, // Third input register switch (Inst.getOpcode()) { - case ARM::VST1d8T: - case ARM::VST1d16T: - case ARM::VST1d32T: - case ARM::VST1d64T: - case ARM::VST1d8T_UPD: - case ARM::VST1d16T_UPD: - case ARM::VST1d32T_UPD: - case ARM::VST1d64T_UPD: - case ARM::VST1d8Q: - case ARM::VST1d16Q: - case ARM::VST1d32Q: - case ARM::VST1d64Q: - case ARM::VST1d8Q_UPD: - case ARM::VST1d16Q_UPD: - case ARM::VST1d32Q_UPD: - case ARM::VST1d64Q_UPD: - case ARM::VST2q8: - case ARM::VST2q16: - case ARM::VST2q32: - case ARM::VST2q8_UPD: - case ARM::VST2q16_UPD: - case ARM::VST2q32_UPD: case ARM::VST3d8: case ARM::VST3d16: case ARM::VST3d32: @@ -2392,20 +2362,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, // Fourth input register switch (Inst.getOpcode()) { - case ARM::VST1d8Q: - case ARM::VST1d16Q: - case ARM::VST1d32Q: - case ARM::VST1d64Q: - case ARM::VST1d8Q_UPD: - case ARM::VST1d16Q_UPD: - case ARM::VST1d32Q_UPD: - case ARM::VST1d64Q_UPD: - case ARM::VST2q8: - case ARM::VST2q16: - case ARM::VST2q32: - case ARM::VST2q8_UPD: - case ARM::VST2q16_UPD: - case ARM::VST2q32_UPD: case ARM::VST4d8: case ARM::VST4d16: case ARM::VST4d32: @@ -2441,16 +2397,11 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned Rm = fieldFromInstruction32(Insn, 0, 4); unsigned align = fieldFromInstruction32(Insn, 4, 1); unsigned size = fieldFromInstruction32(Insn, 6, 2); - unsigned regs = fieldFromInstruction32(Insn, 5, 1) + 1; align *= (1 << size); if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; - if (regs == 2) { - if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder))) - return MCDisassembler::Fail; - } if (Rm != 0xF) { if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -2460,12 +2411,12 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(align)); - if (Rm == 0xD) - Inst.addOperand(MCOperand::CreateReg(0)); - else if (Rm != 0xF) { - if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) - return MCDisassembler::Fail; - } + // The fixed offset post-increment encodes Rm == 0xd. The no-writeback + // variant encodes Rm == 0xf. Anything else is a register offset post- + // increment and we need to add the register operand to the instruction. + if (Rm != 0xD && Rm != 0xF && + !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; return S; } @@ -2693,7 +2644,6 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned Rm = fieldFromInstruction32(Insn, 0, 4); Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; unsigned op = fieldFromInstruction32(Insn, 6, 1); - unsigned length = fieldFromInstruction32(Insn, 8, 2) + 1; if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; @@ -2702,10 +2652,8 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; // Writeback } - for (unsigned i = 0; i < length; ++i) { - if (!Check(S, DecodeDPRRegisterClass(Inst, (Rn+i)%32, Address, Decoder))) + if (!Check(S, DecodeDPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; - } if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; @@ -4138,4 +4086,3 @@ static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn, return S; } - diff --git a/lib/Target/ARM/Disassembler/CMakeLists.txt b/lib/Target/ARM/Disassembler/CMakeLists.txt index da87751..9de6e5c 100644 --- a/lib/Target/ARM/Disassembler/CMakeLists.txt +++ b/lib/Target/ARM/Disassembler/CMakeLists.txt @@ -11,11 +11,3 @@ set_property( ) endif() add_dependencies(LLVMARMDisassembler ARMCommonTableGen) - -add_llvm_library_dependencies(LLVMARMDisassembler - LLVMARMCodeGen - LLVMARMDesc - LLVMARMInfo - LLVMMC - LLVMSupport - ) diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt index baa9bc3..94075a9 100644 --- a/lib/Target/ARM/Disassembler/LLVMBuild.txt +++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt @@ -21,4 +21,3 @@ name = ARMDisassembler parent = ARM required_libraries = ARMCodeGen ARMDesc ARMInfo MC Support add_to_library_groups = ARM - diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 6c6c021..662097a 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -1029,3 +1029,29 @@ void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum, << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", " << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "}"; } + +void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[]}"; +} + +void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[]}"; +} + +void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}"; +} + diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 3f38f1a..05db2d2 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -133,6 +133,12 @@ public: void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, + raw_ostream &O); }; } // end namespace llvm diff --git a/lib/Target/ARM/InstPrinter/CMakeLists.txt b/lib/Target/ARM/InstPrinter/CMakeLists.txt index fa0b495..e2d4819 100644 --- a/lib/Target/ARM/InstPrinter/CMakeLists.txt +++ b/lib/Target/ARM/InstPrinter/CMakeLists.txt @@ -5,8 +5,3 @@ add_llvm_library(LLVMARMAsmPrinter ) add_dependencies(LLVMARMAsmPrinter ARMCommonTableGen) - -add_llvm_library_dependencies(LLVMARMAsmPrinter - LLVMMC - LLVMSupport - ) diff --git a/lib/Target/ARM/InstPrinter/LLVMBuild.txt b/lib/Target/ARM/InstPrinter/LLVMBuild.txt index b34aab4..6f4fa36 100644 --- a/lib/Target/ARM/InstPrinter/LLVMBuild.txt +++ b/lib/Target/ARM/InstPrinter/LLVMBuild.txt @@ -21,4 +21,3 @@ name = ARMAsmPrinter parent = ARM required_libraries = MC Support add_to_library_groups = ARM - diff --git a/lib/Target/ARM/LLVMBuild.txt b/lib/Target/ARM/LLVMBuild.txt index 9082539..fd4b3a3 100644 --- a/lib/Target/ARM/LLVMBuild.txt +++ b/lib/Target/ARM/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo + [component_0] type = TargetGroup name = ARM @@ -30,4 +33,3 @@ name = ARMCodeGen parent = ARM required_libraries = ARMAsmPrinter ARMDesc ARMInfo Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target add_to_library_groups = ARM - diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 62d04c4..bf1f0e8 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -102,6 +102,11 @@ public: bool MayNeedRelaxation(const MCInst &Inst) const; + bool fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const; + void RelaxInstruction(const MCInst &Inst, MCInst &Res) const; bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; @@ -124,14 +129,49 @@ public: }; } // end anonymous namespace +static unsigned getRelaxedOpcode(unsigned Op) { + switch (Op) { + default: return Op; + case ARM::tBcc: return ARM::t2Bcc; + } +} + bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const { - // FIXME: Thumb targets, different move constant targets.. + if (getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode()) + return true; return false; } +bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const { + // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the + // low bit being an implied zero. There's an implied +4 offset for the + // branch, so we adjust the other way here to determine what's + // encodable. + // + // Relax if the value is too big for a (signed) i8. + int64_t Offset = int64_t(Value) - 4; + return Offset > 254 || Offset < -256; +} + void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { - assert(0 && "ARMAsmBackend::RelaxInstruction() unimplemented"); - return; + unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode()); + + // Sanity check w/ diagnostic if we get here w/ a bogus instruction. + if (RelaxedOp == Inst.getOpcode()) { + SmallString<256> Tmp; + raw_svector_ostream OS(Tmp); + Inst.dump_pretty(OS); + OS << "\n"; + report_fatal_error("unexpected instruction to relax: " + OS.str()); + } + + // The instructions we're relaxing have (so far) the same operands. + // We just need to update to the proper opcode. + Res = Inst; + Res.setOpcode(RelaxedOp); } bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 865c3e2..c38a882 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -1412,7 +1412,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(Op); if (MO.getReg() == 0) return 0x0D; - return MO.getReg(); + return getARMRegisterNumbering(MO.getReg()); } unsigned ARMMCCodeEmitter:: diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 352c73e..f394b4f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCMachOSymbolFlags.h" #include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Support/ErrorHandling.h" @@ -178,9 +179,16 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer, case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movt_hi16_pcrel: MovtBit = 1; + // The thumb bit shouldn't be set in the 'other-half' bit of the + // relocation, but it will be set in FixedValue if the base symbol + // is a thumb function. Clear it out here. + if (A_SD->getFlags() & SF_ThumbFunc) + FixedValue &= 0xfffffffe; break; case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movt_hi16_pcrel: + if (A_SD->getFlags() & SF_ThumbFunc) + FixedValue &= 0xfffffffe; MovtBit = 1; // Fallthrough case ARM::fixup_t2_movw_lo16: @@ -189,7 +197,6 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer, break; } - if (Type == macho::RIT_ARM_HalfDifference) { uint32_t OtherHalf = MovtBit ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16); diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt index f529314..f2cf78a 100644 --- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt @@ -10,10 +10,3 @@ add_dependencies(LLVMARMDesc ARMCommonTableGen) # Hack: we need to include 'main' target directory to grab private headers include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) - -add_llvm_library_dependencies(LLVMARMDesc - LLVMARMAsmPrinter - LLVMARMInfo - LLVMMC - LLVMSupport - ) diff --git a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt index 46b11c7..2a7fe61 100644 --- a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = ARMDesc parent = ARM required_libraries = ARMAsmPrinter ARMInfo MC Support add_to_library_groups = ARM - diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 2df0053..000a37f 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -139,7 +139,7 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { // FIXME: Detect integer instructions properly. const MCInstrDesc &MCID = MI->getDesc(); unsigned Domain = MCID.TSFlags & ARMII::DomainMask; - if (MCID.mayStore()) + if (MI->mayStore()) return false; unsigned Opcode = MCID.getOpcode(); if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) @@ -222,14 +222,14 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, const MCInstrDesc &MCID2 = TII->get(AddSubOpc); unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI)); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID1, TmpReg) + MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg) .addReg(Src1Reg, getKillRegState(Src1Kill)) .addReg(Src2Reg, getKillRegState(Src2Kill)); if (HasLane) MIB.addImm(LaneImm); MIB.addImm(Pred).addReg(PredReg); - MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID2) + MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2) .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead)); if (NegAcc) { @@ -274,7 +274,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { } const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.isBarrier()) { + if (MI->isBarrier()) { clearStack(); Skip = 0; ++MII; diff --git a/lib/Target/ARM/TargetInfo/CMakeLists.txt b/lib/Target/ARM/TargetInfo/CMakeLists.txt index 8b38b13..533e747 100644 --- a/lib/Target/ARM/TargetInfo/CMakeLists.txt +++ b/lib/Target/ARM/TargetInfo/CMakeLists.txt @@ -5,9 +5,3 @@ add_llvm_library(LLVMARMInfo ) add_dependencies(LLVMARMInfo ARMCommonTableGen) - -add_llvm_library_dependencies(LLVMARMInfo - LLVMMC - LLVMSupport - LLVMTarget - ) diff --git a/lib/Target/ARM/TargetInfo/LLVMBuild.txt b/lib/Target/ARM/TargetInfo/LLVMBuild.txt index 046c1fc..a07a940 100644 --- a/lib/Target/ARM/TargetInfo/LLVMBuild.txt +++ b/lib/Target/ARM/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = ARMInfo parent = ARM required_libraries = MC Support Target add_to_library_groups = ARM - diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index e8ed482..e61c0a7 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -643,14 +643,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, assert(Offset && "This code isn't needed if offset already handled!"); unsigned Opcode = MI.getOpcode(); - const MCInstrDesc &Desc = MI.getDesc(); // Remove predicate first. int PIdx = MI.findFirstPredOperandIdx(); if (PIdx != -1) removeOperands(MI, PIdx); - if (Desc.mayLoad()) { + if (MI.mayLoad()) { // Use the destination register to materialize sp + offset. unsigned TmpReg = MI.getOperand(0).getReg(); bool UseRR = false; @@ -673,7 +672,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame // register. The offset is already handled in the vreg value. MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false); - } else if (Desc.mayStore()) { + } else if (MI.mayStore()) { VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); bool UseRR = false; @@ -699,7 +698,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } // Add predicate back if it's needed. - if (MI.getDesc().isPredicable()) { + if (MI.isPredicable()) { MachineInstrBuilder MIB(&MI); AddDefaultPred(MIB); } diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index b627400..55b4d30 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -13,6 +13,7 @@ #include "Thumb2InstrInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -141,7 +142,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI, // rsb r2, 0 // const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.hasOptionalDef() && + if (MI->hasOptionalDef() && MI->getOperand(MCID.getNumOperands() - 1).getReg() == ARM::CPSR) return false; @@ -198,7 +199,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { // Branches, including tricky ones like LDM_RET, need to end an IT // block so check the instruction we just put in the block. for (; MBBI != E && Pos && - (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) { + (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) { if (MBBI->isDebugValue()) continue; @@ -237,6 +238,9 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { // Last instruction in IT block kills ITSTATE. LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill(); + // Finalize the bundle. + FinalizeBundle(MBB, InsertPos.getInstrIterator(), LastITMI); + Modified = true; ++NumITs; } diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index e5fc8b4..e206288 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -452,7 +452,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, // Add the 16-bit load / store instruction. DebugLoc dl = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc)); + MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc)); if (!isLdStMul) { MIB.addOperand(MI->getOperand(0)); MIB.addOperand(MI->getOperand(1)); @@ -478,7 +478,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); - MBB.erase(MI); + MBB.erase_instr(MI); ++NumLdSts; return true; } @@ -513,7 +513,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR) return false; - MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), + MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(ARM::tADDrSPi)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) @@ -525,7 +525,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB); - MBB.erase(MI); + MBB.erase_instr(MI); ++NumNarrows; return true; } @@ -533,8 +533,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, if (Entry.LowRegs1 && !VerifyLowRegs(MI)) return false; - const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.mayLoad() || MCID.mayStore()) + if (MI->mayLoad() || MI->mayStore()) return ReduceLoadStore(MBB, MI, Entry); switch (Opc) { @@ -654,7 +653,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID); + MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); MIB.addOperand(MI->getOperand(0)); if (NewMCID.hasOptionalDef()) { if (HasCC) @@ -678,7 +677,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); - MBB.erase(MI); + MBB.erase_instr(MI); ++Num2Addrs; return true; } @@ -745,7 +744,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID); + MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); MIB.addOperand(MI->getOperand(0)); if (NewMCID.hasOptionalDef()) { if (HasCC) @@ -785,7 +784,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); - MBB.erase(MI); + MBB.erase_instr(MI); ++NumNarrows; return true; } @@ -830,16 +829,22 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { // Yes, CPSR could be livein. bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); MachineInstr *CPSRDef = 0; + MachineInstr *BundleMI = 0; // If this BB loops back to itself, conservatively avoid narrowing the // first instruction that does partial flag update. bool IsSelfLoop = MBB.isSuccessor(&MBB); - MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); - MachineBasicBlock::iterator NextMII; + MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), E = MBB.instr_end(); + MachineBasicBlock::instr_iterator NextMII; for (; MII != E; MII = NextMII) { NextMII = llvm::next(MII); MachineInstr *MI = &*MII; + if (MI->isBundle()) { + BundleMI = MI; + continue; + } + LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); unsigned Opcode = MI->getOpcode(); @@ -850,7 +855,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { if (Entry.Special) { if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; - MachineBasicBlock::iterator I = prior(NextMII); + MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; } goto ProcessNext; @@ -860,7 +865,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; - MachineBasicBlock::iterator I = prior(NextMII); + MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; goto ProcessNext; } @@ -869,15 +874,24 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; - MachineBasicBlock::iterator I = prior(NextMII); + MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; } } ProcessNext: + if (LiveCPSR && + NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle() && + BundleMI->killsRegister(ARM::CPSR)) + // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill + // marker is only on the BUNDLE instruction. Process the BUNDLE + // instruction as we finish with the bundled instruction to work around + // the inconsistency. + LiveCPSR = false; + bool DefCPSR = false; LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); - if (MI->getDesc().isCall()) { + if (MI->isCall()) { // Calls don't really set CPSR. CPSRDef = 0; IsSelfLoop = false; diff --git a/lib/Target/CBackend/CMakeLists.txt b/lib/Target/CBackend/CMakeLists.txt index edf8ee7..fa819a4 100644 --- a/lib/Target/CBackend/CMakeLists.txt +++ b/lib/Target/CBackend/CMakeLists.txt @@ -2,16 +2,4 @@ add_llvm_target(CBackendCodeGen CBackend.cpp ) -add_llvm_library_dependencies(LLVMCBackendCodeGen - LLVMAnalysis - LLVMCBackendInfo - LLVMCodeGen - LLVMCore - LLVMMC - LLVMScalarOpts - LLVMSupport - LLVMTarget - LLVMTransformUtils - ) - add_subdirectory(TargetInfo) diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h index ca346af..8b2286e 100644 --- a/lib/Target/CBackend/CTargetMachine.h +++ b/lib/Target/CBackend/CTargetMachine.h @@ -21,10 +21,10 @@ namespace llvm { struct CTargetMachine : public TargetMachine { CTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : TargetMachine(T, TT, CPU, FS) {} + : TargetMachine(T, TT, CPU, FS, Options) { } virtual bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, diff --git a/lib/Target/CBackend/LLVMBuild.txt b/lib/Target/CBackend/LLVMBuild.txt index 851ded9..e64feb0 100644 --- a/lib/Target/CBackend/LLVMBuild.txt +++ b/lib/Target/CBackend/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = TargetInfo + [component_0] type = TargetGroup name = CBackend @@ -26,4 +29,3 @@ name = CBackendCodeGen parent = CBackend required_libraries = Analysis CBackendInfo CodeGen Core MC Scalar Support Target TransformUtils add_to_library_groups = CBackend - diff --git a/lib/Target/CBackend/TargetInfo/CMakeLists.txt b/lib/Target/CBackend/TargetInfo/CMakeLists.txt index 8e616be..6203616 100644 --- a/lib/Target/CBackend/TargetInfo/CMakeLists.txt +++ b/lib/Target/CBackend/TargetInfo/CMakeLists.txt @@ -3,9 +3,3 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/ add_llvm_library(LLVMCBackendInfo CBackendTargetInfo.cpp ) - -add_llvm_library_dependencies(LLVMCBackendInfo - LLVMMC - LLVMSupport - LLVMTarget - ) diff --git a/lib/Target/CBackend/TargetInfo/LLVMBuild.txt b/lib/Target/CBackend/TargetInfo/LLVMBuild.txt index 35752b7..1b47d8e 100644 --- a/lib/Target/CBackend/TargetInfo/LLVMBuild.txt +++ b/lib/Target/CBackend/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = CBackendInfo parent = CBackend required_libraries = MC Support Target add_to_library_groups = CBackend - diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index 60e2189..22d8c76 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -3,7 +3,6 @@ add_llvm_library(LLVMTarget Target.cpp TargetData.cpp TargetELFWriterInfo.cpp - TargetFrameLowering.cpp TargetInstrInfo.cpp TargetIntrinsicInfo.cpp TargetLibraryInfo.cpp @@ -13,12 +12,6 @@ add_llvm_library(LLVMTarget TargetSubtargetInfo.cpp ) -add_llvm_library_dependencies(LLVMTarget - LLVMCore - LLVMMC - LLVMSupport - ) - foreach(t ${LLVM_TARGETS_TO_BUILD}) message(STATUS "Targeting ${t}") add_subdirectory(${t}) diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt index b442a5c..6c67c2d 100644 --- a/lib/Target/CellSPU/CMakeLists.txt +++ b/lib/Target/CellSPU/CMakeLists.txt @@ -23,17 +23,5 @@ add_llvm_target(CellSPUCodeGen SPUNopFiller.cpp ) -add_llvm_library_dependencies(LLVMCellSPUCodeGen - LLVMAsmPrinter - LLVMCellSPUDesc - LLVMCellSPUInfo - LLVMCodeGen - LLVMCore - LLVMMC - LLVMSelectionDAG - LLVMSupport - LLVMTarget - ) - add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/CellSPU/LLVMBuild.txt b/lib/Target/CellSPU/LLVMBuild.txt index 4ae26b2..277620b 100644 --- a/lib/Target/CellSPU/LLVMBuild.txt +++ b/lib/Target/CellSPU/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = MCTargetDesc TargetInfo + [component_0] type = TargetGroup name = CellSPU @@ -27,4 +30,3 @@ name = CellSPUCodeGen parent = CellSPU required_libraries = AsmPrinter CellSPUDesc CellSPUInfo CodeGen Core MC SelectionDAG Support Target add_to_library_groups = CellSPU - diff --git a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt index d41fe93..0027bdb 100644 --- a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt @@ -3,9 +3,4 @@ add_llvm_library(LLVMCellSPUDesc SPUMCAsmInfo.cpp ) -add_llvm_library_dependencies(LLVMCellSPUDesc - LLVMCellSPUInfo - LLVMMC - ) - add_dependencies(LLVMCellSPUDesc CellSPUCommonTableGen) diff --git a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt index abc44a2..71e5bbc 100644 --- a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = CellSPUDesc parent = CellSPU required_libraries = CellSPUInfo MC add_to_library_groups = CellSPU - diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp index 093f99f..916f9ba 100644 --- a/lib/Target/CellSPU/SPUFrameLowering.cpp +++ b/lib/Target/CellSPU/SPUFrameLowering.cpp @@ -47,7 +47,8 @@ bool SPUFrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); return MFI->getStackSize() && - (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()); + (MF.getTarget().Options.DisableFramePointerElim(MF) || + MFI->hasVarSizedObjects()); } diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index d58e49b..dc0d5a6 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -296,12 +296,22 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::CTTZ , MVT::i32, Expand); setOperationAction(ISD::CTTZ , MVT::i64, Expand); setOperationAction(ISD::CTTZ , MVT::i128, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i128, Expand); setOperationAction(ISD::CTLZ , MVT::i8, Promote); setOperationAction(ISD::CTLZ , MVT::i16, Promote); setOperationAction(ISD::CTLZ , MVT::i32, Legal); setOperationAction(ISD::CTLZ , MVT::i64, Expand); setOperationAction(ISD::CTLZ , MVT::i128, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i128, Expand); // SPU has a version of select that implements (a&~c)|(b&c), just like // select ought to work: diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index 6940316..1e922a4 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -34,9 +34,10 @@ SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const { SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index 909f12e..0841fee 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -39,7 +39,7 @@ class SPUTargetMachine : public LLVMTargetMachine { InstrItineraryData InstrItins; public: SPUTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); diff --git a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt index 3f2d6b09..6a98f95 100644 --- a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt +++ b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt @@ -4,10 +4,4 @@ add_llvm_library(LLVMCellSPUInfo CellSPUTargetInfo.cpp ) -add_llvm_library_dependencies(LLVMCellSPUInfo - LLVMMC - LLVMSupport - LLVMTarget - ) - add_dependencies(LLVMCellSPUInfo CellSPUCommonTableGen) diff --git a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt index 0710cc3..6937e70 100644 --- a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt +++ b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = CellSPUInfo parent = CellSPU required_libraries = MC Support Target add_to_library_groups = CellSPU - diff --git a/lib/Target/CppBackend/CMakeLists.txt b/lib/Target/CppBackend/CMakeLists.txt index 53f6868..515e1dd 100644 --- a/lib/Target/CppBackend/CMakeLists.txt +++ b/lib/Target/CppBackend/CMakeLists.txt @@ -2,11 +2,4 @@ add_llvm_target(CppBackendCodeGen CPPBackend.cpp ) -add_llvm_library_dependencies(LLVMCppBackendCodeGen - LLVMCore - LLVMCppBackendInfo - LLVMSupport - LLVMTarget - ) - add_subdirectory(TargetInfo) diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index a3613b4..92bca6c 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -23,10 +23,10 @@ class formatted_raw_ostream; struct CPPTargetMachine : public TargetMachine { CPPTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : TargetMachine(T, TT, CPU, FS) {} + : TargetMachine(T, TT, CPU, FS, Options) {} virtual bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, diff --git a/lib/Target/CppBackend/LLVMBuild.txt b/lib/Target/CppBackend/LLVMBuild.txt index 77e31c7..122b5e7 100644 --- a/lib/Target/CppBackend/LLVMBuild.txt +++ b/lib/Target/CppBackend/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = TargetInfo + [component_0] type = TargetGroup name = CppBackend @@ -26,4 +29,3 @@ name = CppBackendCodeGen parent = CppBackend required_libraries = Core CppBackendInfo Support Target add_to_library_groups = CppBackend - diff --git a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt index 738b215..f82d72e 100644 --- a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt +++ b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt @@ -3,9 +3,3 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/ add_llvm_library(LLVMCppBackendInfo CppBackendTargetInfo.cpp ) - -add_llvm_library_dependencies(LLVMCppBackendInfo - LLVMMC - LLVMSupport - LLVMTarget - ) diff --git a/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt b/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt index 67a23ba..d4dfc3e 100644 --- a/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt +++ b/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = CppBackendInfo parent = CppBackend required_libraries = MC Support Target add_to_library_groups = CppBackend - diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt new file mode 100644 index 0000000..f8705ee --- /dev/null +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -0,0 +1,35 @@ +set(LLVM_TARGET_DEFINITIONS Hexagon.td) + +tablegen(LLVM HexagonGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM HexagonGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM HexagonGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM HexagonGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM HexagonGenCallingConv.inc -gen-callingconv) +tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM HexagonGenIntrinsics.inc -gen-tgt-intrinsic) +add_public_tablegen_target(HexagonCommonTableGen) + +add_llvm_target(HexagonCodeGen + HexagonAsmPrinter.cpp + HexagonCallingConvLower.cpp + HexagonCFGOptimizer.cpp + HexagonExpandPredSpillCode.cpp + HexagonFrameLowering.cpp + HexagonHardwareLoops.cpp + HexagonInstrInfo.cpp + HexagonISelDAGToDAG.cpp + HexagonISelLowering.cpp + HexagonMCAsmInfo.cpp + HexagonOptimizeSZExtends.cpp + HexagonRegisterInfo.cpp + HexagonRemoveSZExtArgs.cpp + HexagonSelectionDAGInfo.cpp + HexagonSplitTFRCondSets.cpp + HexagonSubtarget.cpp + HexagonTargetMachine.cpp + HexagonTargetObjectFile.cpp + ) + +add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) + diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h new file mode 100644 index 0000000..a5f2279 --- /dev/null +++ b/lib/Target/Hexagon/Hexagon.h @@ -0,0 +1,54 @@ +//=-- Hexagon.h - Top-level interface for Hexagon representation --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// Hexagon back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_Hexagon_H +#define TARGET_Hexagon_H + +#include <cassert> +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + class FunctionPass; + class TargetMachine; + class HexagonTargetMachine; + class raw_ostream; + + FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM); + FunctionPass *createHexagonDelaySlotFillerPass(TargetMachine &TM); + FunctionPass *createHexagonFPMoverPass(TargetMachine &TM); + FunctionPass *createHexagonRemoveExtendOps(HexagonTargetMachine &TM); + FunctionPass *createHexagonCFGOptimizer(HexagonTargetMachine &TM); + + FunctionPass* createHexagonSplitTFRCondSets(HexagonTargetMachine &TM); + FunctionPass* createHexagonExpandPredSpillCode(HexagonTargetMachine &TM); + + FunctionPass *createHexagonHardwareLoops(); + FunctionPass *createHexagonOptimizeSZExtends(); + FunctionPass *createHexagonFixupHwLoops(); + +} // end namespace llvm; + +#define Hexagon_POINTER_SIZE 4 + +#define Hexagon_PointerSize (Hexagon_POINTER_SIZE) +#define Hexagon_PointerSize_Bits (Hexagon_POINTER_SIZE * 8) +#define Hexagon_WordSize Hexagon_PointerSize +#define Hexagon_WordSize_Bits Hexagon_PointerSize_Bits + +// allocframe saves LR and FP on stack before allocating +// a new stack frame. This takes 8 bytes. +#define HEXAGON_LRFP_SIZE 8 + +#endif diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td new file mode 100644 index 0000000..72939e6 --- /dev/null +++ b/lib/Target/Hexagon/Hexagon.td @@ -0,0 +1,66 @@ +//===- Hexagon.td - Describe the Hexagon Target Machine ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// Hexagon Subtarget features. +// + + +// Hexagon Archtectures +def ArchV2 : SubtargetFeature<"v2", "HexagonArchVersion", "V2", + "Hexagon v2">; +def ArchV3 : SubtargetFeature<"v3", "HexagonArchVersion", "V3", + "Hexagon v3">; +def ArchV4 : SubtargetFeature<"v4", "HexagonArchVersion", "V4", + "Hexagon v4">; + +//===----------------------------------------------------------------------===// +// Register File, Calling Conv, Instruction Descriptions +//===----------------------------------------------------------------------===// +include "HexagonSchedule.td" +include "HexagonRegisterInfo.td" +include "HexagonCallingConv.td" +include "HexagonInstrInfo.td" +include "HexagonIntrinsics.td" +include "HexagonIntrinsicsDerived.td" + + +def HexagonInstrInfo : InstrInfo { + // Define how we want to layout our target-specific information field. +} + +//===----------------------------------------------------------------------===// +// Hexagon processors supported. +//===----------------------------------------------------------------------===// + +class Proc<string Name, ProcessorItineraries Itin, + list<SubtargetFeature> Features> + : Processor<Name, Itin, Features>; + +def : Proc<"hexagonv2", HexagonItineraries, [ArchV2]>; +def : Proc<"hexagonv3", HexagonItineraries, [ArchV2, ArchV3]>; +def : Proc<"hexagonv4", HexagonItinerariesV4, [ArchV2, ArchV3, ArchV4]>; + +//===----------------------------------------------------------------------===// +// Declare the target which we are implementing +//===----------------------------------------------------------------------===// + +def Hexagon : Target { + // Pull in Instruction Info: + let InstructionSet = HexagonInstrInfo; +} diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp new file mode 100644 index 0000000..8f8e804 --- /dev/null +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -0,0 +1,555 @@ +//===-- HexagonAsmPrinter.cpp - Print machine instrs to Hexagon assembly ----=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to Hexagon assembly language. This printer is +// the output mechanism used by `llc'. +// +// Documentation at http://developer.apple.com/documentation/DeveloperTools/ +// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html +// +//===----------------------------------------------------------------------===// + + +#define DEBUG_TYPE "asm-printer" +#include "Hexagon.h" +#include "HexagonTargetMachine.h" +#include "HexagonSubtarget.h" +#include "HexagonMachineFunctionInfo.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<bool> AlignCalls( + "hexagon-align-calls", cl::Hidden, cl::init(true), + cl::desc("Insert falign after call instruction for Hexagon target")); + + +namespace { + class HexagonAsmPrinter : public AsmPrinter { + const HexagonSubtarget *Subtarget; + + public: + explicit HexagonAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) { + Subtarget = &TM.getSubtarget<HexagonSubtarget>(); + } + + virtual const char *getPassName() const { + return "Hexagon Assembly Printer"; + } + + /// printInstruction - This method is automatically generated by tablegen + /// from the instruction set description. This method returns true if the + /// machine instruction was sufficiently described to print it, otherwise it + void printInstruction(const MachineInstr *MI, raw_ostream &O); + virtual void EmitInstruction(const MachineInstr *MI); + + void printOp(const MachineOperand &MO, raw_ostream &O); + + /// printRegister - Print register according to target requirements. + /// + void printRegister(const MachineOperand &MO, bool R0AsZero, + raw_ostream &O) { + unsigned RegNo = MO.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??"); + O << getRegisterName(RegNo); + } + + void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS) { + const MachineOperand &MO = MI->getOperand(OpNo); + if (MO.isReg()) { + printRegister(MO, false, OS); + } else if (MO.isImm()) { + OS << MO.getImm(); + } else { + printOp(MO, OS); + } + } + + + bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS); + + + void printHexagonImmOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + int value = MI->getOperand(OpNo).getImm(); + O << value; + } + + + void printHexagonNegImmOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + int value = MI->getOperand(OpNo).getImm(); + O << -value; + } + + void printHexagonMEMriOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + const MachineOperand &MO1 = MI->getOperand(OpNo); + const MachineOperand &MO2 = MI->getOperand(OpNo+1); + + O << getRegisterName(MO1.getReg()) + << " + #" + << (int) MO2.getImm(); + } + + + void printHexagonFrameIndexOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + const MachineOperand &MO1 = MI->getOperand(OpNo); + const MachineOperand &MO2 = MI->getOperand(OpNo+1); + + O << getRegisterName(MO1.getReg()) + << ", #" + << MO2.getImm(); + } + + void printBranchOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + // Branches can take an immediate operand. This is used by the branch + // selection pass to print $+8, an eight byte displacement from the PC. + if (MI->getOperand(OpNo).isImm()) { + O << "$+" << MI->getOperand(OpNo).getImm()*4; + } else { + printOp(MI->getOperand(OpNo), O); + } + } + + void printCallOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + } + + void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + } + + + void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + O << "#HI("; + if (MI->getOperand(OpNo).isImm()) { + printHexagonImmOperand(MI, OpNo, O); + } else { + printOp(MI->getOperand(OpNo), O); + } + O << ")"; + } + + void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + O << "#HI("; + if (MI->getOperand(OpNo).isImm()) { + printHexagonImmOperand(MI, OpNo, O); + } else { + printOp(MI->getOperand(OpNo), O); + } + O << ")"; + } + + void printPredicateOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O); + + void printAddrModeBasePlusOffset(const MachineInstr *MI, int OpNo, + raw_ostream &O); + + void printGlobalOperand(const MachineInstr *MI, int OpNo, raw_ostream &O); + void printJumpTable(const MachineInstr *MI, int OpNo, raw_ostream &O); + + void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const; + + static const char *getRegisterName(unsigned RegNo); + }; + +} // end of anonymous namespace + +// Include the auto-generated portion of the assembly writer. +#include "HexagonGenAsmWriter.inc" + + +void HexagonAsmPrinter::EmitAlignment(unsigned NumBits, + const GlobalValue *GV) const { + + // For basic block level alignment, use falign. + if (!GV) { + OutStreamer.EmitRawText(StringRef("\t.falign")); + return; + } + + AsmPrinter::EmitAlignment(NumBits, GV); +} + +void HexagonAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { + switch (MO.getType()) { + case MachineOperand::MO_Immediate: + dbgs() << "printOp() does not handle immediate values\n"; + abort(); + return; + + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + return; + case MachineOperand::MO_JumpTableIndex: + O << *GetJTISymbol(MO.getIndex()); + // FIXME: PIC relocation model. + return; + case MachineOperand::MO_ConstantPoolIndex: + O << *GetCPISymbol(MO.getIndex()); + return; + case MachineOperand::MO_ExternalSymbol: + O << *GetExternalSymbolSymbol(MO.getSymbolName()); + return; + case MachineOperand::MO_GlobalAddress: { + // Computing the address of a global symbol, not calling it. + O << *Mang->getSymbol(MO.getGlobal()); + printOffset(MO.getOffset(), O); + return; + } + + default: + O << "<unknown operand type: " << MO.getType() << ">"; + return; + } +} + + +// +// isBlockOnlyReachableByFallthrough - We need to override this since the +// default AsmPrinter does not print labels for any basic block that +// is only reachable by a fall through. That works for all cases except +// for the case in which the basic block is reachable by a fall through but +// through an indirect from a jump table. In this case, the jump table +// will contain a label not defined by AsmPrinter. +// +bool HexagonAsmPrinter:: +isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { + if (MBB->hasAddressTaken()) { + return false; + } + return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB); +} + + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &OS) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'c': // Don't print "$" before a global var name or constant. + // Hexagon never has a prefix. + printOperand(MI, OpNo, OS); + return false; + case 'L': // Write second word of DImode reference. + // Verify that this operand has two consecutive registers. + if (!MI->getOperand(OpNo).isReg() || + OpNo+1 == MI->getNumOperands() || + !MI->getOperand(OpNo+1).isReg()) + return true; + ++OpNo; // Return the high-part. + break; + case 'I': + // Write 'i' if an integer constant, otherwise nothing. Used to print + // addi vs add, etc. + if (MI->getOperand(OpNo).isImm()) + OS << "i"; + return false; + } + } + + printOperand(MI, OpNo, OS); + return false; +} + +bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + + const MachineOperand &Base = MI->getOperand(OpNo); + const MachineOperand &Offset = MI->getOperand(OpNo+1); + + if (Base.isReg()) + printOperand(MI, OpNo, O); + else + assert(0 && "Unimplemented"); + + if (Offset.isImm()) { + if (Offset.getImm()) + O << " + #" << Offset.getImm(); + } + else + assert(0 && "Unimplemented"); + + return false; +} + +void HexagonAsmPrinter::printPredicateOperand(const MachineInstr *MI, + unsigned OpNo, + raw_ostream &O) { + assert(0 && "Unimplemented"); +} + + +/// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to +/// the current output stream. +/// +void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { + SmallString<128> Str; + raw_svector_ostream O(Str); + + const MachineFunction* MF = MI->getParent()->getParent(); + const HexagonMachineFunctionInfo* MFI = + (const HexagonMachineFunctionInfo*) + MF->getInfo<HexagonMachineFunctionInfo>(); + + + + // Print a brace for the beginning of the packet. + if (MFI->isStartPacket(MI)) { + O << "\t{" << '\n'; + } + + DEBUG( O << "// MI = " << *MI << '\n';); + + // Indent + O << "\t"; + + + if (MI->getOpcode() == Hexagon::ENDLOOP0) { + if (MFI->isEndPacket(MI) && MFI->isStartPacket(MI)) { + O << "\t{ nop }"; + } else { + O << "}"; + } + printInstruction(MI, O); + } else if (MI->getOpcode() == Hexagon::STriwt) { + // + // Handle truncated store on Hexagon. + // + O << "\tmemw("; + printHexagonMEMriOperand(MI, 0, O); + + O << ") = "; + unsigned SubRegNum = + TM.getRegisterInfo()->getSubReg(MI->getOperand(2) + .getReg(), Hexagon::subreg_loreg); + const char *SubRegName = getRegisterName(SubRegNum); + O << SubRegName << '\n'; + } else if (MI->getOpcode() == Hexagon::MPYI_rin) { + // Handle multipy with -ve constant on Hexagon: + // "$dst =- mpyi($src1, #$src2)" + printOperand(MI, 0, O); + O << " =- mpyi("; + printOperand(MI, 1, O); + O << ", #"; + printHexagonNegImmOperand(MI, 2, O); + O << ")"; + } else if (MI->getOpcode() == Hexagon::MEMw_ADDSUBi_indexed_MEM_V4) { + // + // Handle memw(Rs+u6:2) [+-]= #U5 + // + O << "\tmemw("; printHexagonMEMriOperand(MI, 0, O); O << ") "; + int addend = MI->getOperand(2).getImm(); + if (addend < 0) + O << "-= " << "#" << -addend << '\n'; + else + O << "+= " << "#" << addend << '\n'; + } else if (MI->getOpcode() == Hexagon::MEMw_ADDSUBi_MEM_V4) { + // + // Handle memw(Rs+u6:2) [+-]= #U5 + // + O << "\tmemw("; printHexagonMEMriOperand(MI, 0, O); O << ") "; + int addend = MI->getOperand(2).getImm(); + if (addend < 0) + O << "-= " << "#" << -addend << '\n'; + else + O << "+= " << "#" << addend << '\n'; + } else if (MI->getOpcode() == Hexagon::MEMh_ADDSUBi_indexed_MEM_V4) { + // + // Handle memh(Rs+u6:1) [+-]= #U5 + // + O << "\tmemh("; printHexagonMEMriOperand(MI, 0, O); O << ") "; + int addend = MI->getOperand(2).getImm(); + if (addend < 0) + O << "-= " << "#" << -addend << '\n'; + else + O << "+= " << "#" << addend << '\n'; + } else if (MI->getOpcode() == Hexagon::MEMh_ADDSUBi_MEM_V4) { + // + // Handle memh(Rs+u6:1) [+-]= #U5 + // + O << "\tmemh("; printHexagonMEMriOperand(MI, 0, O); O << ") "; + int addend = MI->getOperand(2).getImm(); + if (addend < 0) + O << "-= " << "#" << -addend << '\n'; + else + O << "+= " << "#" << addend << '\n'; + } else if (MI->getOpcode() == Hexagon::MEMb_ADDSUBi_indexed_MEM_V4) { + // + // Handle memb(Rs+u6:1) [+-]= #U5 + // + O << "\tmemb("; printHexagonMEMriOperand(MI, 0, O); O << ") "; + int addend = MI->getOperand(2).getImm(); + if (addend < 0) + O << "-= " << "#" << -addend << '\n'; + else + O << "+= " << "#" << addend << '\n'; + } else if (MI->getOpcode() == Hexagon::MEMb_ADDSUBi_MEM_V4) { + // + // Handle memb(Rs+u6:1) [+-]= #U5 + // + O << "\tmemb("; printHexagonMEMriOperand(MI, 0, O); O << ") "; + int addend = MI->getOperand(2).getImm(); + if (addend < 0) + O << "-= " << "#" << -addend << '\n'; + else + O << "+= " << "#" << addend << '\n'; + } else if (MI->getOpcode() == Hexagon::CMPbGTri_V4) { + // + // Handle Pd=cmpb.gt(Rs,#s8) + // + O << "\t"; + printRegister(MI->getOperand(0), false, O); + O << " = cmpb.gt("; + printRegister(MI->getOperand(1), false, O); + O << ", "; + int val = MI->getOperand(2).getImm() >> 24; + O << "#" << val << ")" << '\n'; + } else if (MI->getOpcode() == Hexagon::CMPhEQri_V4) { + // + // Handle Pd=cmph.eq(Rs,#8) + // + O << "\t"; + printRegister(MI->getOperand(0), false, O); + O << " = cmph.eq("; + printRegister(MI->getOperand(1), false, O); + O << ", "; + int val = MI->getOperand(2).getImm(); + assert((((0 <= val) && (val <= 127)) || + ((65408 <= val) && (val <= 65535))) && + "Not in correct range!"); + if (val >= 65408) val -= 65536; + O << "#" << val << ")" << '\n'; + } else if (MI->getOpcode() == Hexagon::CMPhGTri_V4) { + // + // Handle Pd=cmph.gt(Rs,#8) + // + O << "\t"; + printRegister(MI->getOperand(0), false, O); + O << " = cmph.gt("; + printRegister(MI->getOperand(1), false, O); + O << ", "; + int val = MI->getOperand(2).getImm() >> 16; + O << "#" << val << ")" << '\n'; + } else { + printInstruction(MI, O); + } + + // Print a brace for the end of the packet. + if (MFI->isEndPacket(MI) && MI->getOpcode() != Hexagon::ENDLOOP0) { + O << "\n\t}" << '\n'; + } + + if (AlignCalls && MI->getDesc().isCall()) { + O << "\n\t.falign" << "\n"; + } + + OutStreamer.EmitRawText(O.str()); + return; +} + +/// PrintUnmangledNameSafely - Print out the printable characters in the name. +/// Don't print things like \n or \0. +// static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) { +// for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen(); +// Name != E; ++Name) +// if (isprint(*Name)) +// OS << *Name; +// } + + +void HexagonAsmPrinter::printAddrModeBasePlusOffset(const MachineInstr *MI, + int OpNo, raw_ostream &O) { + const MachineOperand &MO1 = MI->getOperand(OpNo); + const MachineOperand &MO2 = MI->getOperand(OpNo+1); + + O << getRegisterName(MO1.getReg()) + << " + #" + << MO2.getImm(); +} + + +void HexagonAsmPrinter::printGlobalOperand(const MachineInstr *MI, int OpNo, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNo); + assert( (MO.getType() == MachineOperand::MO_GlobalAddress) && + "Expecting global address"); + + O << *Mang->getSymbol(MO.getGlobal()); + if (MO.getOffset() != 0) { + O << " + "; + O << MO.getOffset(); + } +} + +void HexagonAsmPrinter::printJumpTable(const MachineInstr *MI, int OpNo, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNo); + assert( (MO.getType() == MachineOperand::MO_JumpTableIndex) && + "Expecting jump table index"); + + // Hexagon_TODO: Do we need name mangling? + O << *GetJTISymbol(MO.getIndex()); +} + +extern "C" void LLVMInitializeHexagonAsmPrinter() { + RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget); +} diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp new file mode 100644 index 0000000..38000e7 --- /dev/null +++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -0,0 +1,240 @@ +//===---- HexagonCFGOptimizer.cpp - CFG optimizations ---------------------===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#define DEBUG_TYPE "hexagon_cfg" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "HexagonTargetMachine.h" +#include "HexagonSubtarget.h" +#include "HexagonMachineFunctionInfo.h" +#include <iostream> + +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +namespace { + +class HexagonCFGOptimizer : public MachineFunctionPass { + +private: + HexagonTargetMachine& QTM; + const HexagonSubtarget &QST; + + void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*); + + public: + static char ID; + HexagonCFGOptimizer(HexagonTargetMachine& TM) : MachineFunctionPass(ID), + QTM(TM), + QST(*TM.getSubtargetImpl()) {} + + const char *getPassName() const { + return "Hexagon CFG Optimizer"; + } + bool runOnMachineFunction(MachineFunction &Fn); +}; + + +char HexagonCFGOptimizer::ID = 0; + +static bool IsConditionalBranch(int Opc) { + return (Opc == Hexagon::JMP_Pred) || (Opc == Hexagon::JMP_PredNot) + || (Opc == Hexagon::JMP_PredPt) || (Opc == Hexagon::JMP_PredNotPt); +} + + +static bool IsUnconditionalJump(int Opc) { + return (Opc == Hexagon::JMP); +} + + +void +HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI, + MachineBasicBlock* NewTarget) { + const HexagonInstrInfo *QII = QTM.getInstrInfo(); + int NewOpcode = 0; + switch(MI->getOpcode()) { + case Hexagon::JMP_Pred: + NewOpcode = Hexagon::JMP_PredNot; + break; + + case Hexagon::JMP_PredNot: + NewOpcode = Hexagon::JMP_Pred; + break; + + case Hexagon::JMP_PredPt: + NewOpcode = Hexagon::JMP_PredNotPt; + break; + + case Hexagon::JMP_PredNotPt: + NewOpcode = Hexagon::JMP_PredPt; + break; + + default: + assert(0 && "Cannot handle this case"); + } + + MI->setDesc(QII->get(NewOpcode)); + MI->getOperand(1).setMBB(NewTarget); +} + + +bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { + + // Loop over all of the basic blocks. + for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock* MBB = MBBb; + + // Traverse the basic block. + MachineBasicBlock::iterator MII = MBB->getFirstTerminator(); + if (MII != MBB->end()) { + MachineInstr *MI = MII; + int Opc = MI->getOpcode(); + if (IsConditionalBranch(Opc)) { + + // + // (Case 1) Transform the code if the following condition occurs: + // BB1: if (p0) jump BB3 + // ...falls-through to BB2 ... + // BB2: jump BB4 + // ...next block in layout is BB3... + // BB3: ... + // + // Transform this to: + // BB1: if (!p0) jump BB4 + // Remove BB2 + // BB3: ... + // + // (Case 2) A variation occurs when BB3 contains a JMP to BB4: + // BB1: if (p0) jump BB3 + // ...falls-through to BB2 ... + // BB2: jump BB4 + // ...other basic blocks ... + // BB4: + // ...not a fall-thru + // BB3: ... + // jump BB4 + // + // Transform this to: + // BB1: if (!p0) jump BB4 + // Remove BB2 + // BB3: ... + // BB4: ... + // + unsigned NumSuccs = MBB->succ_size(); + MachineBasicBlock::succ_iterator SI = MBB->succ_begin(); + MachineBasicBlock* FirstSucc = *SI; + MachineBasicBlock* SecondSucc = *(++SI); + MachineBasicBlock* LayoutSucc = NULL; + MachineBasicBlock* JumpAroundTarget = NULL; + + if (MBB->isLayoutSuccessor(FirstSucc)) { + LayoutSucc = FirstSucc; + JumpAroundTarget = SecondSucc; + } else if (MBB->isLayoutSuccessor(SecondSucc)) { + LayoutSucc = SecondSucc; + JumpAroundTarget = FirstSucc; + } else { + // Odd case...cannot handle. + } + + // The target of the unconditional branch must be JumpAroundTarget. + // TODO: If not, we should not invert the unconditional branch. + MachineBasicBlock* CondBranchTarget = NULL; + if ((MI->getOpcode() == Hexagon::JMP_Pred) || + (MI->getOpcode() == Hexagon::JMP_PredNot)) { + CondBranchTarget = MI->getOperand(1).getMBB(); + } + + if (!LayoutSucc || (CondBranchTarget != JumpAroundTarget)) { + continue; + } + + if ((NumSuccs == 2) && LayoutSucc && (LayoutSucc->pred_size() == 1)) { + + // Ensure that BB2 has one instruction -- an unconditional jump. + if ((LayoutSucc->size() == 1) && + IsUnconditionalJump(LayoutSucc->front().getOpcode())) { + MachineBasicBlock* UncondTarget = + LayoutSucc->front().getOperand(0).getMBB(); + // Check if the layout successor of BB2 is BB3. + bool case1 = LayoutSucc->isLayoutSuccessor(JumpAroundTarget); + bool case2 = JumpAroundTarget->isSuccessor(UncondTarget) && + JumpAroundTarget->size() >= 1 && + IsUnconditionalJump(JumpAroundTarget->back().getOpcode()) && + JumpAroundTarget->pred_size() == 1 && + JumpAroundTarget->succ_size() == 1; + + if (case1 || case2) { + InvertAndChangeJumpTarget(MI, UncondTarget); + MBB->removeSuccessor(JumpAroundTarget); + MBB->addSuccessor(UncondTarget); + + // Remove the unconditional branch in LayoutSucc. + LayoutSucc->erase(LayoutSucc->begin()); + LayoutSucc->removeSuccessor(UncondTarget); + LayoutSucc->addSuccessor(JumpAroundTarget); + + // This code performs the conversion for case 2, which moves + // the block to the fall-thru case (BB3 in the code above). + if (case2 && !case1) { + JumpAroundTarget->moveAfter(LayoutSucc); + // only move a block if it doesn't have a fall-thru. otherwise + // the CFG will be incorrect. + if (!UncondTarget->canFallThrough()) { + UncondTarget->moveAfter(JumpAroundTarget); + } + } + + // + // Correct live-in information. Is used by post-RA scheduler + // The live-in to LayoutSucc is now all values live-in to + // JumpAroundTarget. + // + std::vector<unsigned> OrigLiveIn(LayoutSucc->livein_begin(), + LayoutSucc->livein_end()); + std::vector<unsigned> NewLiveIn(JumpAroundTarget->livein_begin(), + JumpAroundTarget->livein_end()); + for (unsigned i = 0; i < OrigLiveIn.size(); ++i) { + LayoutSucc->removeLiveIn(OrigLiveIn[i]); + } + for (unsigned i = 0; i < NewLiveIn.size(); ++i) { + LayoutSucc->addLiveIn(NewLiveIn[i]); + } + } + } + } + } + } + } + return true; +} +} + + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createHexagonCFGOptimizer(HexagonTargetMachine &TM) { + return new HexagonCFGOptimizer(TM); +} diff --git a/lib/Target/Hexagon/HexagonCallingConv.td b/lib/Target/Hexagon/HexagonCallingConv.td new file mode 100644 index 0000000..bd9608b --- /dev/null +++ b/lib/Target/Hexagon/HexagonCallingConv.td @@ -0,0 +1,35 @@ +//===- HexagonCallingConv.td - Calling Conventions Hexagon -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the Hexagon architectures. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Return Value Calling Conventions +//===----------------------------------------------------------------------===// + +// Hexagon 32-bit C return-value convention. +def RetCC_Hexagon32 : CallingConv<[ + CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>, + CCIfType<[i64], CCAssignToReg<[D0, D1, D2]>>, + + // Alternatively, they are assigned to the stack in 4-byte aligned units. + CCAssignToStack<4, 4> +]>; + +// Hexagon 32-bit C Calling convention. +def CC_Hexagon32 : CallingConv<[ + // All arguments get passed in integer registers if there is space. + CCIfType<[i32, i16, i8], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>, + CCIfType<[i64], CCAssignToReg<[D0, D1, D2]>>, + + // Alternatively, they are assigned to the stack in 4-byte aligned units. + CCAssignToStack<4, 4> +]>; diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp new file mode 100644 index 0000000..2e51dbf --- /dev/null +++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp @@ -0,0 +1,207 @@ +//===-- llvm/CallingConvLower.cpp - Calling Convention lowering -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Hexagon_CCState class, used for lowering and +// implementing calling conventions. Adapted from the machine independent +// version of the class (CCState) but this handles calls to varargs functions +// +//===----------------------------------------------------------------------===// + +#include "HexagonCallingConvLower.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "Hexagon.h" +using namespace llvm; + +Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg, + const TargetMachine &tm, + SmallVector<CCValAssign, 16> &locs, + LLVMContext &c) + : CallingConv(CC), IsVarArg(isVarArg), TM(tm), + TRI(*TM.getRegisterInfo()), Locs(locs), Context(c) { + // No stack is used. + StackOffset = 0; + + UsedRegs.resize((TRI.getNumRegs()+31)/32); +} + +// HandleByVal - Allocate a stack slot large enough to pass an argument by +// value. The size and alignment information of the argument is encoded in its +// parameter attribute. +void Hexagon_CCState::HandleByVal(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + int MinSize, int MinAlign, + ISD::ArgFlagsTy ArgFlags) { + unsigned Align = ArgFlags.getByValAlign(); + unsigned Size = ArgFlags.getByValSize(); + if (MinSize > (int)Size) + Size = MinSize; + if (MinAlign > (int)Align) + Align = MinAlign; + unsigned Offset = AllocateStack(Size, Align); + + addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset, + LocVT.getSimpleVT(), LocInfo)); +} + +/// MarkAllocated - Mark a register and all of its aliases as allocated. +void Hexagon_CCState::MarkAllocated(unsigned Reg) { + UsedRegs[Reg/32] |= 1 << (Reg&31); + + if (const unsigned *RegAliases = TRI.getAliasSet(Reg)) + for (; (Reg = *RegAliases); ++RegAliases) + UsedRegs[Reg/32] |= 1 << (Reg&31); +} + +/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node, +/// incorporating info about the formals into this state. +void +Hexagon_CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> + &Ins, + Hexagon_CCAssignFn Fn, + unsigned SretValueInRegs) { + unsigned NumArgs = Ins.size(); + unsigned i = 0; + + // If the function returns a small struct in registers, skip + // over the first (dummy) argument. + if (SretValueInRegs != 0) { + ++i; + } + + + for (; i != NumArgs; ++i) { + EVT ArgVT = Ins[i].VT; + ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, 0, 0, false)) { + dbgs() << "Formal argument #" << i << " has unhandled type " + << ArgVT.getEVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeReturn - Analyze the returned values of an ISD::RET node, +/// incorporating info about the result values into this state. +void +Hexagon_CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, + Hexagon_CCAssignFn Fn, + unsigned SretValueInRegs) { + + // For Hexagon, Return small structures in registers. + if (SretValueInRegs != 0) { + if (SretValueInRegs <= 32) { + unsigned Reg = Hexagon::R0; + addLoc(CCValAssign::getReg(0, MVT::i32, Reg, MVT::i32, + CCValAssign::Full)); + return; + } + if (SretValueInRegs <= 64) { + unsigned Reg = Hexagon::D0; + addLoc(CCValAssign::getReg(0, MVT::i64, Reg, MVT::i64, + CCValAssign::Full)); + return; + } + } + + + // Determine which register each value should be copied into. + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + EVT VT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this, -1, -1, false)){ + dbgs() << "Return operand #" << i << " has unhandled type " + << VT.getEVTString() << "\n"; + abort(); + } + } +} + + +/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info +/// about the passed values into this state. +void +Hexagon_CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> + &Outs, + Hexagon_CCAssignFn Fn, + int NonVarArgsParams, + unsigned SretValueSize) { + unsigned NumOps = Outs.size(); + + unsigned i = 0; + // If the called function returns a small struct in registers, skip + // the first actual parameter. We do not want to pass a pointer to + // the stack location. + if (SretValueSize != 0) { + ++i; + } + + for (; i != NumOps; ++i) { + EVT ArgVT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, + NonVarArgsParams, i+1, false)) { + dbgs() << "Call operand #" << i << " has unhandled type " + << ArgVT.getEVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeCallOperands - Same as above except it takes vectors of types +/// and argument flags. +void +Hexagon_CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs, + SmallVectorImpl<ISD::ArgFlagsTy> &Flags, + Hexagon_CCAssignFn Fn) { + unsigned NumOps = ArgVTs.size(); + for (unsigned i = 0; i != NumOps; ++i) { + EVT ArgVT = ArgVTs[i]; + ISD::ArgFlagsTy ArgFlags = Flags[i]; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, -1, -1, + false)) { + dbgs() << "Call operand #" << i << " has unhandled type " + << ArgVT.getEVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node, +/// incorporating info about the passed values into this state. +void +Hexagon_CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, + Hexagon_CCAssignFn Fn, + unsigned SretValueInRegs) { + + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + EVT VT = Ins[i].VT; + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this, -1, -1, false)) { + dbgs() << "Call result #" << i << " has unhandled type " + << VT.getEVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeCallResult - Same as above except it's specialized for calls which +/// produce a single value. +void Hexagon_CCState::AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn) { + if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this, -1, -1, + false)) { + dbgs() << "Call result has unhandled type " + << VT.getEVTString() << "\n"; + abort(); + } +} diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.h b/lib/Target/Hexagon/HexagonCallingConvLower.h new file mode 100644 index 0000000..1f601e8 --- /dev/null +++ b/lib/Target/Hexagon/HexagonCallingConvLower.h @@ -0,0 +1,189 @@ +//===-- HexagonCallingConvLower.h - Calling Conventions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Hexagon_CCState class, used for lowering +// and implementing calling conventions. Adapted from the target independent +// version but this handles calls to varargs functions +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H +#define LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/CallingConvLower.h" + +// +// Need to handle varargs. +// +namespace llvm { + class TargetRegisterInfo; + class TargetMachine; + class Hexagon_CCState; + class SDNode; + + +/// Hexagon_CCAssignFn - This function assigns a location for Val, updating +/// State to reflect the change. +typedef bool Hexagon_CCAssignFn(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, Hexagon_CCState &State, + int NonVarArgsParams, + int CurrentParam, + bool ForceMem); + + +/// CCState - This class holds information needed while lowering arguments and +/// return values. It captures which registers are already assigned and which +/// stack slots are used. It provides accessors to allocate these values. +class Hexagon_CCState { + CallingConv::ID CallingConv; + bool IsVarArg; + const TargetMachine &TM; + const TargetRegisterInfo &TRI; + SmallVector<CCValAssign, 16> &Locs; + LLVMContext &Context; + + unsigned StackOffset; + SmallVector<uint32_t, 16> UsedRegs; +public: + Hexagon_CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &TM, + SmallVector<CCValAssign, 16> &locs, LLVMContext &c); + + void addLoc(const CCValAssign &V) { + Locs.push_back(V); + } + + LLVMContext &getContext() const { return Context; } + const TargetMachine &getTarget() const { return TM; } + unsigned getCallingConv() const { return CallingConv; } + bool isVarArg() const { return IsVarArg; } + + unsigned getNextStackOffset() const { return StackOffset; } + + /// isAllocated - Return true if the specified register (or an alias) is + /// allocated. + bool isAllocated(unsigned Reg) const { + return UsedRegs[Reg/32] & (1 << (Reg&31)); + } + + /// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node, + /// incorporating info about the formals into this state. + void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, + Hexagon_CCAssignFn Fn, unsigned SretValueInRegs); + + /// AnalyzeReturn - Analyze the returned values of an ISD::RET node, + /// incorporating info about the result values into this state. + void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, + Hexagon_CCAssignFn Fn, unsigned SretValueInRegs); + + /// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info + /// about the passed values into this state. + void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, + Hexagon_CCAssignFn Fn, int NonVarArgsParams, + unsigned SretValueSize); + + /// AnalyzeCallOperands - Same as above except it takes vectors of types + /// and argument flags. + void AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs, + SmallVectorImpl<ISD::ArgFlagsTy> &Flags, + Hexagon_CCAssignFn Fn); + + /// AnalyzeCallResult - Analyze the return values of an ISD::CALL node, + /// incorporating info about the passed values into this state. + void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, + Hexagon_CCAssignFn Fn, unsigned SretValueInRegs); + + /// AnalyzeCallResult - Same as above except it's specialized for calls which + /// produce a single value. + void AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn); + + /// getFirstUnallocated - Return the first unallocated register in the set, or + /// NumRegs if they are all allocated. + unsigned getFirstUnallocated(const unsigned *Regs, unsigned NumRegs) const { + for (unsigned i = 0; i != NumRegs; ++i) + if (!isAllocated(Regs[i])) + return i; + return NumRegs; + } + + /// AllocateReg - Attempt to allocate one register. If it is not available, + /// return zero. Otherwise, return the register, marking it and any aliases + /// as allocated. + unsigned AllocateReg(unsigned Reg) { + if (isAllocated(Reg)) return 0; + MarkAllocated(Reg); + return Reg; + } + + /// Version of AllocateReg with extra register to be shadowed. + unsigned AllocateReg(unsigned Reg, unsigned ShadowReg) { + if (isAllocated(Reg)) return 0; + MarkAllocated(Reg); + MarkAllocated(ShadowReg); + return Reg; + } + + /// AllocateReg - Attempt to allocate one of the specified registers. If none + /// are available, return zero. Otherwise, return the first one available, + /// marking it and any aliases as allocated. + unsigned AllocateReg(const unsigned *Regs, unsigned NumRegs) { + unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs); + if (FirstUnalloc == NumRegs) + return 0; // Didn't find the reg. + + // Mark the register and any aliases as allocated. + unsigned Reg = Regs[FirstUnalloc]; + MarkAllocated(Reg); + return Reg; + } + + /// Version of AllocateReg with list of registers to be shadowed. + unsigned AllocateReg(const unsigned *Regs, const unsigned *ShadowRegs, + unsigned NumRegs) { + unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs); + if (FirstUnalloc == NumRegs) + return 0; // Didn't find the reg. + + // Mark the register and any aliases as allocated. + unsigned Reg = Regs[FirstUnalloc], ShadowReg = ShadowRegs[FirstUnalloc]; + MarkAllocated(Reg); + MarkAllocated(ShadowReg); + return Reg; + } + + /// AllocateStack - Allocate a chunk of stack space with the specified size + /// and alignment. + unsigned AllocateStack(unsigned Size, unsigned Align) { + assert(Align && ((Align-1) & Align) == 0); // Align is power of 2. + StackOffset = ((StackOffset + Align-1) & ~(Align-1)); + unsigned Result = StackOffset; + StackOffset += Size; + return Result; + } + + // HandleByVal - Allocate a stack slot large enough to pass an argument by + // value. The size and alignment information of the argument is encoded in its + // parameter attribute. + void HandleByVal(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags); + +private: + /// MarkAllocated - Mark a register and all of its aliases as allocated. + void MarkAllocated(unsigned Reg); +}; + + + +} // end namespace llvm + +#endif diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp new file mode 100644 index 0000000..cb73ae0 --- /dev/null +++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp @@ -0,0 +1,184 @@ +//===--- HexagonExpandPredSpillCode.cpp - Expand Predicate Spill Code ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===//// +// The Hexagon processor has no instructions that load or store predicate +// registers directly. So, when these registers must be spilled a general +// purpose register must be found and the value copied to/from it from/to +// the predicate register. This code currently does not use the register +// scavenger mechanism available in the allocator. There are two registers +// reserved to allow spilling/restoring predicate registers. One is used to +// hold the predicate value. The other is used when stack frame offsets are +// too large. +// +//===----------------------------------------------------------------------===// + + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "HexagonTargetMachine.h" +#include "HexagonSubtarget.h" +#include "HexagonMachineFunctionInfo.h" +#include <map> +#include <iostream> + +#include "llvm/Support/CommandLine.h" + + +using namespace llvm; + + +namespace { + +class HexagonExpandPredSpillCode : public MachineFunctionPass { + HexagonTargetMachine& QTM; + const HexagonSubtarget &QST; + + public: + static char ID; + HexagonExpandPredSpillCode(HexagonTargetMachine& TM) : + MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {} + + const char *getPassName() const { + return "Hexagon Expand Predicate Spill Code"; + } + bool runOnMachineFunction(MachineFunction &Fn); +}; + + +char HexagonExpandPredSpillCode::ID = 0; + + +bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { + + const HexagonInstrInfo *TII = QTM.getInstrInfo(); + const HexagonRegisterInfo *RegInfo = QTM.getRegisterInfo(); + + // Loop over all of the basic blocks. + for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock* MBB = MBBb; + // Traverse the basic block. + for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); + ++MII) { + MachineInstr *MI = MII; + int Opc = MI->getOpcode(); + if (Opc == Hexagon::STriw_pred) { + // STriw_pred [R30], ofst, SrcReg; + unsigned FP = MI->getOperand(0).getReg(); + assert(FP == RegInfo->getFrameRegister() && + "Not a Frame Pointer, Nor a Spill Slot"); + assert(MI->getOperand(1).isImm() && "Not an offset"); + int Offset = MI->getOperand(1).getImm(); + int SrcReg = MI->getOperand(2).getReg(); + assert(Hexagon::PredRegsRegClass.contains(SrcReg) && + "Not a predicate register"); + if (!TII->isValidOffset(Hexagon::STriw, Offset)) { + if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::CONST32_Int_Real), + HEXAGON_RESERVED_REG_1).addImm(Offset); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr), + HEXAGON_RESERVED_REG_1) + .addReg(FP).addReg(HEXAGON_RESERVED_REG_1); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd), + HEXAGON_RESERVED_REG_2).addReg(SrcReg); + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::STriw)) + .addReg(HEXAGON_RESERVED_REG_1) + .addImm(0).addReg(HEXAGON_RESERVED_REG_2); + } else { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri), + HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd), + HEXAGON_RESERVED_REG_2).addReg(SrcReg); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::STriw)) + .addReg(HEXAGON_RESERVED_REG_1) + .addImm(0) + .addReg(HEXAGON_RESERVED_REG_2); + } + } else { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd), + HEXAGON_RESERVED_REG_2).addReg(SrcReg); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::STriw)). + addReg(FP).addImm(Offset).addReg(HEXAGON_RESERVED_REG_2); + } + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::LDriw_pred) { + // DstReg = LDriw_pred [R30], ofst. + int DstReg = MI->getOperand(0).getReg(); + assert(Hexagon::PredRegsRegClass.contains(DstReg) && + "Not a predicate register"); + unsigned FP = MI->getOperand(1).getReg(); + assert(FP == RegInfo->getFrameRegister() && + "Not a Frame Pointer, Nor a Spill Slot"); + assert(MI->getOperand(2).isImm() && "Not an offset"); + int Offset = MI->getOperand(2).getImm(); + if (!TII->isValidOffset(Hexagon::LDriw, Offset)) { + if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::CONST32_Int_Real), + HEXAGON_RESERVED_REG_1).addImm(Offset); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr), + HEXAGON_RESERVED_REG_1) + .addReg(FP) + .addReg(HEXAGON_RESERVED_REG_1); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw), + HEXAGON_RESERVED_REG_2) + .addReg(HEXAGON_RESERVED_REG_1) + .addImm(0); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs), + DstReg).addReg(HEXAGON_RESERVED_REG_2); + } else { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri), + HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw), + HEXAGON_RESERVED_REG_2) + .addReg(HEXAGON_RESERVED_REG_1) + .addImm(0); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs), + DstReg).addReg(HEXAGON_RESERVED_REG_2); + } + } else { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw), + HEXAGON_RESERVED_REG_2).addReg(FP).addImm(Offset); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs), + DstReg).addReg(HEXAGON_RESERVED_REG_2); + } + MII = MBB->erase(MI); + --MII; + } + } + } + + return true; +} + +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createHexagonExpandPredSpillCode(HexagonTargetMachine &TM) { + return new HexagonExpandPredSpillCode(TM); +} diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp new file mode 100644 index 0000000..78e0b1c --- /dev/null +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -0,0 +1,333 @@ +//==-- HexagonFrameLowering.cpp - Define frame lowering --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// +//===----------------------------------------------------------------------===// +#include "Hexagon.h" +#include "HexagonInstrInfo.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "HexagonMachineFunctionInfo.h" +#include "HexagonFrameLowering.h" + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Type.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include <iostream> + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Function.h" +using namespace llvm; + +static cl::opt<bool> DisableDeallocRet( + "disable-hexagon-dealloc-ret", + cl::Hidden, + cl::desc("Disable Dealloc Return for Hexagon target")); + +/// determineFrameLayout - Determine the size of the frame and maximum call +/// frame size. +void HexagonFrameLowering::determineFrameLayout(MachineFunction &MF) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Get the number of bytes to allocate from the FrameInfo. + unsigned FrameSize = MFI->getStackSize(); + + // Get the alignments provided by the target. + unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); + // Get the maximum call frame size of all the calls. + unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); + + // If we have dynamic alloca then maxCallFrameSize needs to be aligned so + // that allocations will be aligned. + if (MFI->hasVarSizedObjects()) + maxCallFrameSize = RoundUpToAlignment(maxCallFrameSize, TargetAlign); + + // Update maximum call frame size. + MFI->setMaxCallFrameSize(maxCallFrameSize); + + // Include call frame size in total. + FrameSize += maxCallFrameSize; + + // Make sure the frame is aligned. + FrameSize = RoundUpToAlignment(FrameSize, TargetAlign); + + // Update frame info. + MFI->setStackSize(FrameSize); +} + + +void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineModuleInfo &MMI = MF.getMMI(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + const HexagonRegisterInfo *QRI = + static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo()); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + determineFrameLayout(MF); + + // Check if frame moves are needed for EH. + bool needsFrameMoves = MMI.hasDebugInfo() || + !MF.getFunction()->needsUnwindTableEntry(); + + // Get the number of bytes to allocate from the FrameInfo. + int NumBytes = (int) MFI->getStackSize(); + + // LLVM expects allocframe not to be the first instruction in the + // basic block. + MachineBasicBlock::iterator InsertPt = MBB.begin(); + + // + // ALLOCA adjust regs. Iterate over ADJDYNALLOC nodes and change the offset. + // + HexagonMachineFunctionInfo *FuncInfo = + MF.getInfo<HexagonMachineFunctionInfo>(); + const std::vector<MachineInstr*>& AdjustRegs = + FuncInfo->getAllocaAdjustInsts(); + for (std::vector<MachineInstr*>::const_iterator i = AdjustRegs.begin(), + e = AdjustRegs.end(); + i != e; ++i) { + MachineInstr* MI = *i; + assert((MI->getOpcode() == Hexagon::ADJDYNALLOC) && + "Expected adjust alloca node"); + + MachineOperand& MO = MI->getOperand(2); + assert(MO.isImm() && "Expected immediate"); + MO.setImm(MFI->getMaxCallFrameSize()); + } + + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + + if (needsFrameMoves) { + // Advance CFA. DW_CFA_def_cfa + unsigned FPReg = QRI->getFrameRegister(); + unsigned RAReg = QRI->getRARegister(); + + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(FPReg, -8); + Moves.push_back(MachineMove(0, Dst, Src)); + + // R31 = (R31 - #4) + MachineLocation LRDst(RAReg, -4); + MachineLocation LRSrc(RAReg); + Moves.push_back(MachineMove(0, LRDst, LRSrc)); + + // R30 = (R30 - #8) + MachineLocation SPDst(FPReg, -8); + MachineLocation SPSrc(FPReg); + Moves.push_back(MachineMove(0, SPDst, SPSrc)); + } + + // + // Only insert ALLOCFRAME if we need to. + // + if (hasFP(MF)) { + // Check for overflow. + // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used? + const int ALLOCFRAME_MAX = 16384; + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + if (NumBytes >= ALLOCFRAME_MAX) { + // Emit allocframe(#0). + BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(0); + + // Subtract offset from frame pointer. + BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::CONST32_Int_Real), + HEXAGON_RESERVED_REG_1).addImm(NumBytes); + BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::SUB_rr), + QRI->getStackRegister()). + addReg(QRI->getStackRegister()). + addReg(HEXAGON_RESERVED_REG_1); + } else { + BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(NumBytes); + } + } +} +// Returns true if MBB has a machine instructions that indicates a tail call +// in the block. +bool HexagonFrameLowering::hasTailCall(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + unsigned RetOpcode = MBBI->getOpcode(); + + return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext;} + +void HexagonFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + DebugLoc dl = MBBI->getDebugLoc(); + // + // Only insert deallocframe if we need to. + // + if (hasFP(MF)) { + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + MachineBasicBlock::iterator MBBI_end = MBB.end(); + // + // For Hexagon, we don't need the frame size. + // + MachineFrameInfo *MFI = MF.getFrameInfo(); + int NumBytes = (int) MFI->getStackSize(); + + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + // Replace 'jumpr r31' instruction with dealloc_return for V4 and higher + // versions. + if (STI.hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPR + && !DisableDeallocRet) { + // Remove jumpr node. + MBB.erase(MBBI); + // Add dealloc_return. + BuildMI(MBB, MBBI_end, dl, TII.get(Hexagon::DEALLOC_RET_V4)) + .addImm(NumBytes); + } else { // Add deallocframe for V2 and V3. + BuildMI(MBB, MBBI, dl, TII.get(Hexagon::DEALLOCFRAME)).addImm(NumBytes); + } + } +} + +bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const HexagonMachineFunctionInfo *FuncInfo = + MF.getInfo<HexagonMachineFunctionInfo>(); + return (MFI->hasCalls() || (MFI->getStackSize() > 0) || + FuncInfo->hasClobberLR() ); +} + +bool +HexagonFrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + MachineFunction *MF = MBB.getParent(); + const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); + + if (CSI.empty()) { + return false; + } + + // We can only schedule double loads if we spill contiguous callee-saved regs + // For instance, we cannot scheduled double-word loads if we spill r24, + // r26, and r27. + // Hexagon_TODO: We can try to double-word align odd registers for -O2 and + // above. + bool ContiguousRegs = true; + + for (unsigned i = 0; i < CSI.size(); ++i) { + unsigned Reg = CSI[i].getReg(); + + // + // Check if we can use a double-word store. + // + const unsigned* SuperReg = TRI->getSuperRegisters(Reg); + + // Assume that there is exactly one superreg. + assert(SuperReg[0] && !SuperReg[1] && "Expected exactly one superreg"); + bool CanUseDblStore = false; + const TargetRegisterClass* SuperRegClass = 0; + + if (ContiguousRegs && (i < CSI.size()-1)) { + const unsigned* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg()); + assert(SuperRegNext[0] && !SuperRegNext[1] && + "Expected exactly one superreg"); + SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg[0]); + CanUseDblStore = (SuperRegNext[0] == SuperReg[0]); + } + + + if (CanUseDblStore) { + TII.storeRegToStackSlot(MBB, MI, SuperReg[0], true, + CSI[i+1].getFrameIdx(), SuperRegClass, TRI); + MBB.addLiveIn(SuperReg[0]); + ++i; + } else { + // Cannot use a double-word store. + ContiguousRegs = false; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RC, + TRI); + MBB.addLiveIn(Reg); + } + } + return true; +} + + +bool HexagonFrameLowering::restoreCalleeSavedRegisters( + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + + MachineFunction *MF = MBB.getParent(); + const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); + + if (CSI.empty()) { + return false; + } + + // We can only schedule double loads if we spill contiguous callee-saved regs + // For instance, we cannot scheduled double-word loads if we spill r24, + // r26, and r27. + // Hexagon_TODO: We can try to double-word align odd registers for -O2 and + // above. + bool ContiguousRegs = true; + + for (unsigned i = 0; i < CSI.size(); ++i) { + unsigned Reg = CSI[i].getReg(); + + // + // Check if we can use a double-word load. + // + const unsigned* SuperReg = TRI->getSuperRegisters(Reg); + const TargetRegisterClass* SuperRegClass = 0; + + // Assume that there is exactly one superreg. + assert(SuperReg[0] && !SuperReg[1] && "Expected exactly one superreg"); + bool CanUseDblLoad = false; + if (ContiguousRegs && (i < CSI.size()-1)) { + const unsigned* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg()); + assert(SuperRegNext[0] && !SuperRegNext[1] && + "Expected exactly one superreg"); + SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg[0]); + CanUseDblLoad = (SuperRegNext[0] == SuperReg[0]); + } + + + if (CanUseDblLoad) { + TII.loadRegFromStackSlot(MBB, MI, SuperReg[0], CSI[i+1].getFrameIdx(), + SuperRegClass, TRI); + MBB.addLiveIn(SuperReg[0]); + ++i; + } else { + // Cannot use a double-word load. + ContiguousRegs = false; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI); + MBB.addLiveIn(Reg); + } + } + return true; +} + +int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { + return MF.getFrameInfo()->getObjectOffset(FI); +} diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h new file mode 100644 index 0000000..ad87f11 --- /dev/null +++ b/lib/Target/Hexagon/HexagonFrameLowering.h @@ -0,0 +1,50 @@ +//=- HexagonFrameLowering.h - Define frame lowering for Hexagon --*- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGON_FRAMEINFO_H +#define HEXAGON_FRAMEINFO_H + +#include "Hexagon.h" +#include "HexagonSubtarget.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { + +class HexagonFrameLowering : public TargetFrameLowering { +private: + const HexagonSubtarget &STI; + void determineFrameLayout(MachineFunction &MF) const; + +public: + explicit HexagonFrameLowering(const HexagonSubtarget &sti) + : TargetFrameLowering(StackGrowsDown, 8, 0), STI(sti) { + } + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + virtual bool + spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + virtual bool + restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + int getFrameIndexOffset(const MachineFunction &MF, int FI) const; + bool hasFP(const MachineFunction &MF) const; + bool hasTailCall(MachineBasicBlock &MBB) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp new file mode 100644 index 0000000..c1abc4a --- /dev/null +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -0,0 +1,644 @@ +//===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass identifies loops where we can generate the Hexagon hardware +// loop instruction. The hardware loop can perform loop branches with a +// zero-cycle overhead. +// +// The pattern that defines the induction variable can changed depending on +// prior optimizations. For example, the IndVarSimplify phase run by 'opt' +// normalizes induction variables, and the Loop Strength Reduction pass +// run by 'llc' may also make changes to the induction variable. +// The pattern detected by this phase is due to running Strength Reduction. +// +// Criteria for hardware loops: +// - Countable loops (w/ ind. var for a trip count) +// - Assumes loops are normalized by IndVarSimplify +// - Try inner-most loops first +// - No nested hardware loops. +// - No function calls in loops. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hwloops" +#include "llvm/Constants.h" +#include "llvm/PassSupport.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include <algorithm> +#include "Hexagon.h" +#include "HexagonTargetMachine.h" + +using namespace llvm; + +STATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); + +namespace { + class CountValue; + struct HexagonHardwareLoops : public MachineFunctionPass { + MachineLoopInfo *MLI; + MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + + public: + static char ID; // Pass identification, replacement for typeid + + HexagonHardwareLoops() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "Hexagon Hardware Loops"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// getCanonicalInductionVariable - Check to see if the loop has a canonical + /// induction variable. + /// Should be defined in MachineLoop. Based upon version in class Loop. + const MachineInstr *getCanonicalInductionVariable(MachineLoop *L) const; + + /// getTripCount - Return a loop-invariant LLVM register indicating the + /// number of times the loop will be executed. If the trip-count cannot + /// be determined, this return null. + CountValue *getTripCount(MachineLoop *L) const; + + /// isInductionOperation - Return true if the instruction matches the + /// pattern for an opertion that defines an induction variable. + bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const; + + /// isInvalidOperation - Return true if the instruction is not valid within + /// a hardware loop. + bool isInvalidLoopOperation(const MachineInstr *MI) const; + + /// containsInavlidInstruction - Return true if the loop contains an + /// instruction that inhibits using the hardware loop. + bool containsInvalidInstruction(MachineLoop *L) const; + + /// converToHardwareLoop - Given a loop, check if we can convert it to a + /// hardware loop. If so, then perform the conversion and return true. + bool convertToHardwareLoop(MachineLoop *L); + + }; + + char HexagonHardwareLoops::ID = 0; + + + // CountValue class - Abstraction for a trip count of a loop. A + // smaller vesrsion of the MachineOperand class without the concerns + // of changing the operand representation. + class CountValue { + public: + enum CountValueType { + CV_Register, + CV_Immediate + }; + private: + CountValueType Kind; + union Values { + unsigned RegNum; + int64_t ImmVal; + Values(unsigned r) : RegNum(r) {} + Values(int64_t i) : ImmVal(i) {} + } Contents; + bool isNegative; + + public: + CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r), + isNegative(neg) {} + explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i), + isNegative(i < 0) {} + CountValueType getType() const { return Kind; } + bool isReg() const { return Kind == CV_Register; } + bool isImm() const { return Kind == CV_Immediate; } + bool isNeg() const { return isNegative; } + + unsigned getReg() const { + assert(isReg() && "Wrong CountValue accessor"); + return Contents.RegNum; + } + void setReg(unsigned Val) { + Contents.RegNum = Val; + } + int64_t getImm() const { + assert(isImm() && "Wrong CountValue accessor"); + if (isNegative) { + return -Contents.ImmVal; + } + return Contents.ImmVal; + } + void setImm(int64_t Val) { + Contents.ImmVal = Val; + } + + void print(raw_ostream &OS, const TargetMachine *TM = 0) const { + if (isReg()) { OS << PrintReg(getReg()); } + if (isImm()) { OS << getImm(); } + } + }; + + struct HexagonFixupHwLoops : public MachineFunctionPass { + public: + static char ID; // Pass identification, replacement for typeid. + + HexagonFixupHwLoops() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// Maximum distance between the loop instr and the basic block. + /// Just an estimate. + static const unsigned MAX_LOOP_DISTANCE = 200; + + /// fixupLoopInstrs - Check the offset between each loop instruction and + /// the loop basic block to determine if we can use the LOOP instruction + /// or if we need to set the LC/SA registers explicitly. + bool fixupLoopInstrs(MachineFunction &MF); + + /// convertLoopInstr - Add the instruction to set the LC and SA registers + /// explicitly. + void convertLoopInstr(MachineFunction &MF, + MachineBasicBlock::iterator &MII, + RegScavenger &RS); + + }; + + char HexagonFixupHwLoops::ID = 0; + +} // end anonymous namespace + + +/// isHardwareLoop - Returns true if the instruction is a hardware loop +/// instruction. +static bool isHardwareLoop(const MachineInstr *MI) { + return MI->getOpcode() == Hexagon::LOOP0_r || + MI->getOpcode() == Hexagon::LOOP0_i; +} + +/// isCompareEquals - Returns true if the instruction is a compare equals +/// instruction with an immediate operand. +static bool isCompareEqualsImm(const MachineInstr *MI) { + return MI->getOpcode() == Hexagon::CMPEQri; +} + + +/// createHexagonHardwareLoops - Factory for creating +/// the hardware loop phase. +FunctionPass *llvm::createHexagonHardwareLoops() { + return new HexagonHardwareLoops(); +} + + +bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n"); + + bool Changed = false; + + // get the loop information + MLI = &getAnalysis<MachineLoopInfo>(); + // get the register information + MRI = &MF.getRegInfo(); + // the target specific instructio info. + TII = MF.getTarget().getInstrInfo(); + + for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); + I != E; ++I) { + MachineLoop *L = *I; + if (!L->getParentLoop()) { + Changed |= convertToHardwareLoop(L); + } + } + + return Changed; +} + +/// getCanonicalInductionVariable - Check to see if the loop has a canonical +/// induction variable. We check for a simple recurrence pattern - an +/// integer recurrence that decrements by one each time through the loop and +/// ends at zero. If so, return the phi node that corresponds to it. +/// +/// Based upon the similar code in LoopInfo except this code is specific to +/// the machine. +/// This method assumes that the IndVarSimplify pass has been run by 'opt'. +/// +const MachineInstr +*HexagonHardwareLoops::getCanonicalInductionVariable(MachineLoop *L) const { + MachineBasicBlock *TopMBB = L->getTopBlock(); + MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); + assert(PI != TopMBB->pred_end() && + "Loop must have more than one incoming edge!"); + MachineBasicBlock *Backedge = *PI++; + if (PI == TopMBB->pred_end()) return 0; // dead loop + MachineBasicBlock *Incoming = *PI++; + if (PI != TopMBB->pred_end()) return 0; // multiple backedges? + + // make sure there is one incoming and one backedge and determine which + // is which. + if (L->contains(Incoming)) { + if (L->contains(Backedge)) + return 0; + std::swap(Incoming, Backedge); + } else if (!L->contains(Backedge)) + return 0; + + // Loop over all of the PHI nodes, looking for a canonical induction variable: + // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2". + // - The recurrence comes from the backedge. + // - the definition is an induction operatio.n + for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end(); + I != E && I->isPHI(); ++I) { + const MachineInstr *MPhi = &*I; + unsigned DefReg = MPhi->getOperand(0).getReg(); + for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) { + // Check each operand for the value from the backedge. + MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB(); + if (L->contains(MBB)) { // operands comes from the backedge + // Check if the definition is an induction operation. + const MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg()); + if (isInductionOperation(DI, DefReg)) { + return MPhi; + } + } + } + } + return 0; +} + +/// getTripCount - Return a loop-invariant LLVM value indicating the +/// number of times the loop will be executed. The trip count can +/// be either a register or a constant value. If the trip-count +/// cannot be determined, this returns null. +/// +/// We find the trip count from the phi instruction that defines the +/// induction variable. We follow the links to the CMP instruction +/// to get the trip count. +/// +/// Based upon getTripCount in LoopInfo. +/// +CountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const { + // Check that the loop has a induction variable. + const MachineInstr *IV_Inst = getCanonicalInductionVariable(L); + if (IV_Inst == 0) return 0; + + // Canonical loops will end with a 'cmpeq_ri IV, Imm', + // if Imm is 0, get the count from the PHI opnd + // if Imm is -M, than M is the count + // Otherwise, Imm is the count + const MachineOperand *IV_Opnd; + const MachineOperand *InitialValue; + if (!L->contains(IV_Inst->getOperand(2).getMBB())) { + InitialValue = &IV_Inst->getOperand(1); + IV_Opnd = &IV_Inst->getOperand(3); + } else { + InitialValue = &IV_Inst->getOperand(3); + IV_Opnd = &IV_Inst->getOperand(1); + } + + // Look for the cmp instruction to determine if we + // can get a useful trip count. The trip count can + // be either a register or an immediate. The location + // of the value depends upon the type (reg or imm). + while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) { + const MachineInstr *MI = IV_Opnd->getParent(); + if (L->contains(MI) && isCompareEqualsImm(MI)) { + const MachineOperand &MO = MI->getOperand(2); + assert(MO.isImm() && "IV Cmp Operand should be 0"); + int64_t ImmVal = MO.getImm(); + + const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg()); + assert(L->contains(IV_DefInstr->getParent()) && + "IV definition should occurs in loop"); + int64_t iv_value = IV_DefInstr->getOperand(2).getImm(); + + if (ImmVal == 0) { + // Make sure the induction variable changes by one on each iteration. + if (iv_value != 1 && iv_value != -1) { + return 0; + } + return new CountValue(InitialValue->getReg(), iv_value > 0); + } else { + assert(InitialValue->isReg() && "Expecting register for init value"); + const MachineInstr *DefInstr = MRI->getVRegDef(InitialValue->getReg()); + if (DefInstr && DefInstr->getOpcode() == Hexagon::TFRI) { + int64_t count = ImmVal - DefInstr->getOperand(1).getImm(); + if ((count % iv_value) != 0) { + return 0; + } + return new CountValue(count/iv_value); + } + } + } + } + return 0; +} + +/// isInductionOperation - return true if the operation is matches the +/// pattern that defines an induction variable: +/// add iv, c +/// +bool +HexagonHardwareLoops::isInductionOperation(const MachineInstr *MI, + unsigned IVReg) const { + return (MI->getOpcode() == + Hexagon::ADD_ri && MI->getOperand(1).getReg() == IVReg); +} + +/// isInvalidOperation - Return true if the operation is invalid within +/// hardware loop. +bool +HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI) const { + + // call is not allowed because the callee may use a hardware loop + if (MI->getDesc().isCall()) { + return true; + } + // do not allow nested hardware loops + if (isHardwareLoop(MI)) { + return true; + } + // check if the instruction defines a hardware loop register + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && + (MO.getReg() == Hexagon::LC0 || MO.getReg() == Hexagon::LC1 || + MO.getReg() == Hexagon::SA0 || MO.getReg() == Hexagon::SA0)) { + return true; + } + } + return false; +} + +/// containsInvalidInstruction - Return true if the loop contains +/// an instruction that inhibits the use of the hardware loop function. +/// +bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const { + const std::vector<MachineBasicBlock*> Blocks = L->getBlocks(); + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *MBB = Blocks[i]; + for (MachineBasicBlock::iterator + MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { + const MachineInstr *MI = &*MII; + if (isInvalidLoopOperation(MI)) { + return true; + } + } + } + return false; +} + +/// converToHardwareLoop - check if the loop is a candidate for +/// converting to a hardware loop. If so, then perform the +/// transformation. +/// +/// This function works on innermost loops first. A loop can +/// be converted if it is a counting loop; either a register +/// value or an immediate. +/// +/// The code makes several assumptions about the representation +/// of the loop in llvm. +bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { + bool Changed = false; + // Process nested loops first. + for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) { + Changed |= convertToHardwareLoop(*I); + } + // If a nested loop has been converted, then we can't convert this loop. + if (Changed) { + return Changed; + } + // Are we able to determine the trip count for the loop? + CountValue *TripCount = getTripCount(L); + if (TripCount == 0) { + return false; + } + // Does the loop contain any invalid instructions? + if (containsInvalidInstruction(L)) { + return false; + } + MachineBasicBlock *Preheader = L->getLoopPreheader(); + // No preheader means there's not place for the loop instr. + if (Preheader == 0) { + return false; + } + MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); + + MachineBasicBlock *LastMBB = L->getExitingBlock(); + // Don't generate hw loop if the loop has more than one exit. + if (LastMBB == 0) { + return false; + } + MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); + + // Determine the loop start. + MachineBasicBlock *LoopStart = L->getTopBlock(); + if (L->getLoopLatch() != LastMBB) { + // When the exit and latch are not the same, use the latch block as the + // start. + // The loop start address is used only after the 1st iteration, and the loop + // latch may contains instrs. that need to be executed after the 1st iter. + LoopStart = L->getLoopLatch(); + // Make sure the latch is a successor of the exit, otherwise it won't work. + if (!LastMBB->isSuccessor(LoopStart)) { + return false; + } + } + + // Convert the loop to a hardware loop + DEBUG(dbgs() << "Change to hardware loop at "; L->dump()); + + if (TripCount->isReg()) { + // Create a copy of the loop count register. + MachineFunction *MF = LastMBB->getParent(); + const TargetRegisterClass *RC = + MF->getRegInfo().getRegClass(TripCount->getReg()); + unsigned CountReg = MF->getRegInfo().createVirtualRegister(RC); + BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), + TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg()); + if (TripCount->isNeg()) { + unsigned CountReg1 = CountReg; + CountReg = MF->getRegInfo().createVirtualRegister(RC); + BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), + TII->get(Hexagon::NEG), CountReg).addReg(CountReg1); + } + + // Add the Loop instruction to the begining of the loop. + BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), + TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg); + } else { + assert(TripCount->isImm() && "Expecting immedate vaule for trip count"); + // Add the Loop immediate instruction to the beginning of the loop. + int64_t CountImm = TripCount->getImm(); + BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), + TII->get(Hexagon::LOOP0_i)).addMBB(LoopStart).addImm(CountImm); + } + + // Make sure the loop start always has a reference in the CFG. We need to + // create a BlockAddress operand to get this mechanism to work both the + // MachineBasicBlock and BasicBlock objects need the flag set. + LoopStart->setHasAddressTaken(); + // This line is needed to set the hasAddressTaken flag on the BasicBlock + // object + BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock())); + + // Replace the loop branch with an endloop instruction. + DebugLoc dl = LastI->getDebugLoc(); + BuildMI(*LastMBB, LastI, dl, TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart); + + // The loop ends with either: + // - a conditional branch followed by an unconditional branch, or + // - a conditional branch to the loop start. + if (LastI->getOpcode() == Hexagon::JMP_Pred || + LastI->getOpcode() == Hexagon::JMP_PredNot) { + // delete one and change/add an uncond. branch to out of the loop + MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB(); + LastI = LastMBB->erase(LastI); + if (!L->contains(BranchTarget)) { + if (LastI != LastMBB->end()) { + TII->RemoveBranch(*LastMBB); + } + SmallVector<MachineOperand, 0> Cond; + TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, dl); + } + } else { + // Conditional branch to loop start; just delete it. + LastMBB->erase(LastI); + } + delete TripCount; + + ++NumHWLoops; + return true; +} + +/// createHexagonFixupHwLoops - Factory for creating the hardware loop +/// phase. +FunctionPass *llvm::createHexagonFixupHwLoops() { + return new HexagonFixupHwLoops(); +} + +bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "****** Hexagon Hardware Loop Fixup ******\n"); + + bool Changed = fixupLoopInstrs(MF); + return Changed; +} + +/// fixupLoopInsts - For Hexagon, if the loop label is to far from the +/// loop instruction then we need to set the LC0 and SA0 registers +/// explicitly instead of using LOOP(start,count). This function +/// checks the distance, and generates register assignments if needed. +/// +/// This function makes two passes over the basic blocks. The first +/// pass computes the offset of the basic block from the start. +/// The second pass checks all the loop instructions. +bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) { + + // Offset of the current instruction from the start. + unsigned InstOffset = 0; + // Map for each basic block to it's first instruction. + DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset; + + // First pass - compute the offset of each basic block. + for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end(); + MBB != MBBe; ++MBB) { + BlockToInstOffset[MBB] = InstOffset; + InstOffset += (MBB->size() * 4); + } + + // Second pass - check each loop instruction to see if it needs to + // be converted. + InstOffset = 0; + bool Changed = false; + RegScavenger RS; + + // Loop over all the basic blocks. + for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end(); + MBB != MBBe; ++MBB) { + InstOffset = BlockToInstOffset[MBB]; + RS.enterBasicBlock(MBB); + + // Loop over all the instructions. + MachineBasicBlock::iterator MIE = MBB->end(); + MachineBasicBlock::iterator MII = MBB->begin(); + while (MII != MIE) { + if (isHardwareLoop(MII)) { + RS.forward(MII); + assert(MII->getOperand(0).isMBB() && + "Expect a basic block as loop operand"); + int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()]; + diff = (diff > 0 ? diff : -diff); + if ((unsigned)diff > MAX_LOOP_DISTANCE) { + // Convert to explicity setting LC0 and SA0. + convertLoopInstr(MF, MII, RS); + MII = MBB->erase(MII); + Changed = true; + } else { + ++MII; + } + } else { + ++MII; + } + InstOffset += 4; + } + } + + return Changed; + +} + +/// convertLoopInstr - convert a loop instruction to a sequence of instructions +/// that set the lc and sa register explicitly. +void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF, + MachineBasicBlock::iterator &MII, + RegScavenger &RS) { + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); + MachineBasicBlock *MBB = MII->getParent(); + DebugLoc DL = MII->getDebugLoc(); + unsigned Scratch = RS.scavengeRegister(Hexagon::IntRegsRegisterClass, MII, 0); + + // First, set the LC0 with the trip count. + if (MII->getOperand(1).isReg()) { + // Trip count is a register + BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0) + .addReg(MII->getOperand(1).getReg()); + } else { + // Trip count is an immediate. + BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch) + .addImm(MII->getOperand(1).getImm()); + BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0) + .addReg(Scratch); + } + // Then, set the SA0 with the loop start address. + BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch) + .addMBB(MII->getOperand(0).getMBB()); + BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0).addReg(Scratch); +} diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp new file mode 100644 index 0000000..4deab9f --- /dev/null +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -0,0 +1,1495 @@ +//==-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon ----==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the Hexagon target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-isel" +#include "HexagonISelLowering.h" +#include "HexagonTargetMachine.h" +#include "llvm/Intrinsics.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +//===--------------------------------------------------------------------===// +/// HexagonDAGToDAGISel - Hexagon specific code to select Hexagon machine +/// instructions for SelectionDAG operations. +/// +namespace { +class HexagonDAGToDAGISel : public SelectionDAGISel { + /// Subtarget - Keep a pointer to the Hexagon Subtarget around so that we can + /// make the right decision when generating code for different targets. + const HexagonSubtarget &Subtarget; + + // Keep a reference to HexagonTargetMachine. + HexagonTargetMachine& TM; + const HexagonInstrInfo *TII; + +public: + explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine) + : SelectionDAGISel(targetmachine), + Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()), + TM(targetmachine), + TII(static_cast<const HexagonInstrInfo*>(TM.getInstrInfo())) { + + } + + SDNode *Select(SDNode *N); + + // Complex Pattern Selectors. + bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectADDRriS11_2(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectMEMriS11_2(SDValue& Addr, SDValue &Base, SDValue &Offset); + bool SelectADDRriS11_3(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectADDRrr(SDValue &Addr, SDValue &Base, SDValue &Offset); + bool SelectADDRriU6_0(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectADDRriU6_1(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectADDRriU6_2(SDValue& N, SDValue &R1, SDValue &R2); + + virtual const char *getPassName() const { + return "Hexagon DAG->DAG Pattern Instruction Selection"; + } + + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps); + bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset); + + SDNode *SelectLoad(SDNode *N); + SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, DebugLoc dl); + SDNode *SelectIndexedLoad(LoadSDNode *LD, DebugLoc dl); + SDNode *SelectIndexedLoadZeroExtend64(LoadSDNode *LD, unsigned Opcode, + DebugLoc dl); + SDNode *SelectIndexedLoadSignExtend64(LoadSDNode *LD, unsigned Opcode, + DebugLoc dl); + SDNode *SelectBaseOffsetStore(StoreSDNode *ST, DebugLoc dl); + SDNode *SelectIndexedStore(StoreSDNode *ST, DebugLoc dl); + SDNode *SelectStore(SDNode *N); + SDNode *SelectSHL(SDNode *N); + SDNode *SelectSelect(SDNode *N); + SDNode *SelectTruncate(SDNode *N); + SDNode *SelectMul(SDNode *N); + SDNode *SelectZeroExtend(SDNode *N); + SDNode *SelectIntrinsicWOChain(SDNode *N); + SDNode *SelectConstant(SDNode *N); + SDNode *SelectAdd(SDNode *N); + + // Include the pieces autogenerated from the target description. +#include "HexagonGenDAGISel.inc" +}; +} // end anonymous namespace + + +/// createHexagonISelDag - This pass converts a legalized DAG into a +/// Hexagon-specific DAG, ready for instruction scheduling. +/// +FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM) { + return new HexagonDAGToDAGISel(TM); +} + +static bool IsS11_0_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<11>(v); +} + + +static bool IsS11_1_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,1>(v); +} + + +static bool IsS11_2_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,2>(v); +} + + +static bool IsS11_3_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,3>(v); +} + + +static bool IsU6_0_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // u6 predicate - True if the immediate fits in a 6-bit unsigned extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<6>(v); +} + + +static bool IsU6_1_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // u6 predicate - True if the immediate fits in a 6-bit unsigned extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<6,1>(v); +} + + +static bool IsU6_2_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // u6 predicate - True if the immediate fits in a 6-bit unsigned extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<6,2>(v); +} + + +// Intrinsics that return a a predicate. +static unsigned doesIntrinsicReturnPredicate(unsigned ID) +{ + switch (ID) { + default: + return 0; + case Intrinsic::hexagon_C2_cmpeq: + case Intrinsic::hexagon_C2_cmpgt: + case Intrinsic::hexagon_C2_cmpgtu: + case Intrinsic::hexagon_C2_cmpgtup: + case Intrinsic::hexagon_C2_cmpgtp: + case Intrinsic::hexagon_C2_cmpeqp: + case Intrinsic::hexagon_C2_bitsset: + case Intrinsic::hexagon_C2_bitsclr: + case Intrinsic::hexagon_C2_cmpeqi: + case Intrinsic::hexagon_C2_cmpgti: + case Intrinsic::hexagon_C2_cmpgtui: + case Intrinsic::hexagon_C2_cmpgei: + case Intrinsic::hexagon_C2_cmpgeui: + case Intrinsic::hexagon_C2_cmplt: + case Intrinsic::hexagon_C2_cmpltu: + case Intrinsic::hexagon_C2_bitsclri: + case Intrinsic::hexagon_C2_and: + case Intrinsic::hexagon_C2_or: + case Intrinsic::hexagon_C2_xor: + case Intrinsic::hexagon_C2_andn: + case Intrinsic::hexagon_C2_not: + case Intrinsic::hexagon_C2_orn: + case Intrinsic::hexagon_C2_pxfer_map: + case Intrinsic::hexagon_C2_any8: + case Intrinsic::hexagon_C2_all8: + case Intrinsic::hexagon_A2_vcmpbeq: + case Intrinsic::hexagon_A2_vcmpbgtu: + case Intrinsic::hexagon_A2_vcmpheq: + case Intrinsic::hexagon_A2_vcmphgt: + case Intrinsic::hexagon_A2_vcmphgtu: + case Intrinsic::hexagon_A2_vcmpweq: + case Intrinsic::hexagon_A2_vcmpwgt: + case Intrinsic::hexagon_A2_vcmpwgtu: + case Intrinsic::hexagon_C2_tfrrp: + case Intrinsic::hexagon_S2_tstbit_i: + case Intrinsic::hexagon_S2_tstbit_r: + return 1; + } +} + + +// Intrinsics that have predicate operands. +static unsigned doesIntrinsicContainPredicate(unsigned ID) +{ + switch (ID) { + default: + return 0; + case Intrinsic::hexagon_C2_tfrpr: + return Hexagon::TFR_RsPd; + case Intrinsic::hexagon_C2_and: + return Hexagon::AND_pp; + case Intrinsic::hexagon_C2_xor: + return Hexagon::XOR_pp; + case Intrinsic::hexagon_C2_or: + return Hexagon::OR_pp; + case Intrinsic::hexagon_C2_not: + return Hexagon::NOT_pp; + case Intrinsic::hexagon_C2_any8: + return Hexagon::ANY_pp; + case Intrinsic::hexagon_C2_all8: + return Hexagon::ALL_pp; + case Intrinsic::hexagon_C2_vitpack: + return Hexagon::VITPACK_pp; + case Intrinsic::hexagon_C2_mask: + return Hexagon::MASK_p; + case Intrinsic::hexagon_C2_mux: + return Hexagon::MUX_rr; + + // Mapping hexagon_C2_muxir to MUX_pri. This is pretty weird - but + // that's how it's mapped in q6protos.h. + case Intrinsic::hexagon_C2_muxir: + return Hexagon::MUX_ri; + + // Mapping hexagon_C2_muxri to MUX_pir. This is pretty weird - but + // that's how it's mapped in q6protos.h. + case Intrinsic::hexagon_C2_muxri: + return Hexagon::MUX_ir; + + case Intrinsic::hexagon_C2_muxii: + return Hexagon::MUX_ii; + case Intrinsic::hexagon_C2_vmux: + return Hexagon::VMUX_prr64; + case Intrinsic::hexagon_S2_valignrb: + return Hexagon::VALIGN_rrp; + case Intrinsic::hexagon_S2_vsplicerb: + return Hexagon::VSPLICE_rrp; + } +} + + +static bool OffsetFitsS11(EVT MemType, int64_t Offset) { + if (MemType == MVT::i64 && isShiftedInt<11,3>(Offset)) { + return true; + } + if (MemType == MVT::i32 && isShiftedInt<11,2>(Offset)) { + return true; + } + if (MemType == MVT::i16 && isShiftedInt<11,1>(Offset)) { + return true; + } + if (MemType == MVT::i8 && isInt<11>(Offset)) { + return true; + } + return false; +} + + +// +// Try to lower loads of GlobalAdresses into base+offset loads. Custom +// lowering for GlobalAddress nodes has already turned it into a +// CONST32. +// +SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, DebugLoc dl) { + EVT LoadedVT = LD->getMemoryVT(); + SDValue Chain = LD->getChain(); + SDNode* Const32 = LD->getBasePtr().getNode(); + unsigned Opcode = 0; + + if (Const32->getOpcode() == HexagonISD::CONST32 && + ISD::isNormalLoad(LD)) { + SDValue Base = Const32->getOperand(0); + EVT LoadedVT = LD->getMemoryVT(); + int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset(); + if (Offset != 0 && OffsetFitsS11(LoadedVT, Offset)) { + MVT PointerTy = TLI.getPointerTy(); + const GlobalValue* GV = + cast<GlobalAddressSDNode>(Base)->getGlobal(); + SDValue TargAddr = + CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0); + SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set, + dl, PointerTy, + TargAddr); + // Figure out base + offset opcode + if (LoadedVT == MVT::i64) Opcode = Hexagon::LDrid_indexed; + else if (LoadedVT == MVT::i32) Opcode = Hexagon::LDriw_indexed; + else if (LoadedVT == MVT::i16) Opcode = Hexagon::LDrih_indexed; + else if (LoadedVT == MVT::i8) Opcode = Hexagon::LDrib_indexed; + else assert (0 && "unknown memory type"); + + // Build indexed load. + SDValue TargetConstOff = CurDAG->getTargetConstant(Offset, PointerTy); + SDNode* Result = CurDAG->getMachineNode(Opcode, dl, + LD->getValueType(0), + MVT::Other, + SDValue(NewBase,0), + TargetConstOff, + Chain); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); + ReplaceUses(LD, Result); + return Result; + } + } + + return SelectCode(LD); +} + + +SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD, + unsigned Opcode, + DebugLoc dl) +{ + SDValue Chain = LD->getChain(); + EVT LoadedVT = LD->getMemoryVT(); + SDValue Base = LD->getBasePtr(); + SDValue Offset = LD->getOffset(); + SDNode *OffsetNode = Offset.getNode(); + int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); + SDValue N1 = LD->getOperand(1); + SDValue CPTmpN1_0; + SDValue CPTmpN1_1; + if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) && + N1.getNode()->getValueType(0) == MVT::i32) { + if (TII->isValidAutoIncImm(LoadedVT, Val)) { + SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, + MVT::Other, Base, TargetConst, + Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl, MVT::i64, + SDValue(Result_1, 0)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result_2, 0), + SDValue(Result_1, 1), + SDValue(Result_1, 2) + }; + ReplaceUses(Froms, Tos, 3); + return Result_2; + } + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, + MVT::Other, Base, TargetConst0, + Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl, + MVT::i64, SDValue(Result_1, 0)); + SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, + MVT::i32, Base, TargetConstVal, + SDValue(Result_1, 1)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result_2, 0), + SDValue(Result_3, 0), + SDValue(Result_1, 1) + }; + ReplaceUses(Froms, Tos, 3); + return Result_2; + } + return SelectCode(LD); +} + + +SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD, + unsigned Opcode, + DebugLoc dl) +{ + SDValue Chain = LD->getChain(); + EVT LoadedVT = LD->getMemoryVT(); + SDValue Base = LD->getBasePtr(); + SDValue Offset = LD->getOffset(); + SDNode *OffsetNode = Offset.getNode(); + int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); + SDValue N1 = LD->getOperand(1); + SDValue CPTmpN1_0; + SDValue CPTmpN1_1; + if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) && + N1.getNode()->getValueType(0) == MVT::i32) { + if (TII->isValidAutoIncImm(LoadedVT, Val)) { + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, + MVT::i32, MVT::Other, Base, + TargetConstVal, Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32, + TargetConst0); + SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl, + MVT::i64, MVT::Other, + SDValue(Result_2,0), + SDValue(Result_1,0)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result_3, 0), + SDValue(Result_1, 1), + SDValue(Result_1, 2) + }; + ReplaceUses(Froms, Tos, 3); + return Result_3; + } + + // Generate an indirect load. + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, + MVT::Other, + Base, TargetConst0, Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32, + TargetConst0); + SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl, + MVT::i64, MVT::Other, + SDValue(Result_2,0), + SDValue(Result_1,0)); + // Add offset to base. + SDNode* Result_4 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32, + Base, TargetConstVal, + SDValue(Result_1, 1)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result_3, 0), // Load value. + SDValue(Result_4, 0), // New address. + SDValue(Result_1, 1) + }; + ReplaceUses(Froms, Tos, 3); + return Result_3; + } + + return SelectCode(LD); +} + + +SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, DebugLoc dl) { + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Offset = LD->getOffset(); + SDNode *OffsetNode = Offset.getNode(); + // Get the constant value. + int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); + EVT LoadedVT = LD->getMemoryVT(); + unsigned Opcode = 0; + + // Check for zero ext loads. + bool zextval = (LD->getExtensionType() == ISD::ZEXTLOAD); + + // Figure out the opcode. + if (LoadedVT == MVT::i64) { + if (TII->isValidAutoIncImm(LoadedVT, Val)) + Opcode = Hexagon::POST_LDrid; + else + Opcode = Hexagon::LDrid; + } else if (LoadedVT == MVT::i32) { + if (TII->isValidAutoIncImm(LoadedVT, Val)) + Opcode = Hexagon::POST_LDriw; + else + Opcode = Hexagon::LDriw; + } else if (LoadedVT == MVT::i16) { + if (TII->isValidAutoIncImm(LoadedVT, Val)) + Opcode = zextval ? Hexagon::POST_LDriuh : Hexagon::POST_LDrih; + else + Opcode = zextval ? Hexagon::LDriuh : Hexagon::LDrih; + } else if (LoadedVT == MVT::i8) { + if (TII->isValidAutoIncImm(LoadedVT, Val)) + Opcode = zextval ? Hexagon::POST_LDriub : Hexagon::POST_LDrib; + else + Opcode = zextval ? Hexagon::LDriub : Hexagon::LDrib; + } else + assert (0 && "unknown memory type"); + + // For zero ext i64 loads, we need to add combine instructions. + if (LD->getValueType(0) == MVT::i64 && + LD->getExtensionType() == ISD::ZEXTLOAD) { + return SelectIndexedLoadZeroExtend64(LD, Opcode, dl); + } + if (LD->getValueType(0) == MVT::i64 && + LD->getExtensionType() == ISD::SEXTLOAD) { + // Handle sign ext i64 loads. + return SelectIndexedLoadSignExtend64(LD, Opcode, dl); + } + if (TII->isValidAutoIncImm(LoadedVT, Val)) { + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode* Result = CurDAG->getMachineNode(Opcode, dl, + LD->getValueType(0), + MVT::i32, MVT::Other, Base, + TargetConstVal, Chain); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result, 0), + SDValue(Result, 1), + SDValue(Result, 2) + }; + ReplaceUses(Froms, Tos, 3); + return Result; + } else { + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, + LD->getValueType(0), + MVT::Other, Base, TargetConst0, + Chain); + SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32, + Base, TargetConstVal, + SDValue(Result_1, 1)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result_1, 0), + SDValue(Result_2, 0), + SDValue(Result_1, 1) + }; + ReplaceUses(Froms, Tos, 3); + return Result_1; + } + + return SelectCode(LD); +} + + +SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) { + SDNode *result; + DebugLoc dl = N->getDebugLoc(); + LoadSDNode *LD = cast<LoadSDNode>(N); + ISD::MemIndexedMode AM = LD->getAddressingMode(); + + // Handle indexed loads. + if (AM != ISD::UNINDEXED) { + result = SelectIndexedLoad(LD, dl); + } else { + result = SelectBaseOffsetLoad(LD, dl); + } + + return result; +} + + +SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) { + SDValue Chain = ST->getChain(); + SDValue Base = ST->getBasePtr(); + SDValue Offset = ST->getOffset(); + SDValue Value = ST->getValue(); + SDNode *OffsetNode = Offset.getNode(); + // Get the constant value. + int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); + EVT StoredVT = ST->getMemoryVT(); + + // Offset value must be within representable range + // and must have correct alignment properties. + if (TII->isValidAutoIncImm(StoredVT, Val)) { + SDValue Ops[] = { Value, Base, + CurDAG->getTargetConstant(Val, MVT::i32), Chain}; + unsigned Opcode = 0; + + // Figure out the post inc version of opcode. + if (StoredVT == MVT::i64) Opcode = Hexagon::POST_STdri; + else if (StoredVT == MVT::i32) Opcode = Hexagon::POST_STwri; + else if (StoredVT == MVT::i16) Opcode = Hexagon::POST_SThri; + else if (StoredVT == MVT::i8) Opcode = Hexagon::POST_STbri; + else assert (0 && "unknown memory type"); + + // Build post increment store. + SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32, + MVT::Other, Ops, 4); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = ST->getMemOperand(); + cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); + + ReplaceUses(ST, Result); + ReplaceUses(SDValue(ST,1), SDValue(Result,1)); + return Result; + } + + // Note: Order of operands matches the def of instruction: + // def STrid : STInst<(outs), (ins MEMri:$addr, DoubleRegs:$src1), ... + // and it differs for POST_ST* for instance. + SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, MVT::i32), Value, + Chain}; + unsigned Opcode = 0; + + // Figure out the opcode. + if (StoredVT == MVT::i64) Opcode = Hexagon::STrid; + else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw; + else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih; + else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib; + else assert (0 && "unknown memory type"); + + // Build regular store. + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, + 4); + // Build splitted incriment instruction. + SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32, + Base, + TargetConstVal, + SDValue(Result_1, 0)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = ST->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + + ReplaceUses(SDValue(ST,0), SDValue(Result_2,0)); + ReplaceUses(SDValue(ST,1), SDValue(Result_1,0)); + return Result_2; +} + + +SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST, + DebugLoc dl) { + SDValue Chain = ST->getChain(); + SDNode* Const32 = ST->getBasePtr().getNode(); + SDValue Value = ST->getValue(); + unsigned Opcode = 0; + + // Try to lower stores of GlobalAdresses into indexed stores. Custom + // lowering for GlobalAddress nodes has already turned it into a + // CONST32. Avoid truncating stores for the moment. Post-inc stores + // do the same. Don't think there's a reason for it, so will file a + // bug to fix. + if ((Const32->getOpcode() == HexagonISD::CONST32) && + !(Value.getValueType() == MVT::i64 && ST->isTruncatingStore())) { + SDValue Base = Const32->getOperand(0); + if (Base.getOpcode() == ISD::TargetGlobalAddress) { + EVT StoredVT = ST->getMemoryVT(); + int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset(); + if (Offset != 0 && OffsetFitsS11(StoredVT, Offset)) { + MVT PointerTy = TLI.getPointerTy(); + const GlobalValue* GV = + cast<GlobalAddressSDNode>(Base)->getGlobal(); + SDValue TargAddr = + CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0); + SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set, + dl, PointerTy, + TargAddr); + + // Figure out base + offset opcode + if (StoredVT == MVT::i64) Opcode = Hexagon::STrid_indexed; + else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw_indexed; + else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih_indexed; + else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib_indexed; + else assert (0 && "unknown memory type"); + + SDValue Ops[] = {SDValue(NewBase,0), + CurDAG->getTargetConstant(Offset,PointerTy), + Value, Chain}; + // build indexed store + SDNode* Result = CurDAG->getMachineNode(Opcode, dl, + MVT::Other, Ops, 4); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = ST->getMemOperand(); + cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); + ReplaceUses(ST, Result); + return Result; + } + } + } + + return SelectCode(ST); +} + + +SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + StoreSDNode *ST = cast<StoreSDNode>(N); + ISD::MemIndexedMode AM = ST->getAddressingMode(); + + // Handle indexed stores. + if (AM != ISD::UNINDEXED) { + return SelectIndexedStore(ST, dl); + } + + return SelectBaseOffsetStore(ST, dl); +} + +SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + + // + // %conv.i = sext i32 %tmp1 to i64 + // %conv2.i = sext i32 %add to i64 + // %mul.i = mul nsw i64 %conv2.i, %conv.i + // + // --- match with the following --- + // + // %mul.i = mpy (%tmp1, %add) + // + + if (N->getValueType(0) == MVT::i64) { + // Shifting a i64 signed multiply. + SDValue MulOp0 = N->getOperand(0); + SDValue MulOp1 = N->getOperand(1); + + SDValue OP0; + SDValue OP1; + + // Handle sign_extend and sextload. + if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) { + SDValue Sext0 = MulOp0.getOperand(0); + if (Sext0.getNode()->getValueType(0) != MVT::i32) { + SelectCode(N); + } + + OP0 = Sext0; + } else if (MulOp0.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode()); + if (LD->getMemoryVT() != MVT::i32 || + LD->getExtensionType() != ISD::SEXTLOAD || + LD->getAddressingMode() != ISD::UNINDEXED) { + SelectCode(N); + } + + SDValue Base = LD->getBasePtr(); + SDValue Chain = LD->getChain(); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32, + MVT::Other, + LD->getBasePtr(), TargetConst0, + Chain), 0); + } else { + return SelectCode(N); + } + + // Same goes for the second operand. + if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) { + SDValue Sext1 = MulOp1.getOperand(0); + if (Sext1.getNode()->getValueType(0) != MVT::i32) { + return SelectCode(N); + } + + OP1 = Sext1; + } else if (MulOp1.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode()); + if (LD->getMemoryVT() != MVT::i32 || + LD->getExtensionType() != ISD::SEXTLOAD || + LD->getAddressingMode() != ISD::UNINDEXED) { + return SelectCode(N); + } + + SDValue Base = LD->getBasePtr(); + SDValue Chain = LD->getChain(); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32, + MVT::Other, + LD->getBasePtr(), TargetConst0, + Chain), 0); + } else { + return SelectCode(N); + } + + // Generate a mpy instruction. + SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY64, dl, MVT::i64, + OP0, OP1); + ReplaceUses(N, Result); + return Result; + } + + return SelectCode(N); +} + + +SDNode *HexagonDAGToDAGISel::SelectSelect(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDValue N0 = N->getOperand(0); + if (N0.getOpcode() == ISD::SETCC) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::SIGN_EXTEND_INREG) { + SDValue N000 = N00.getOperand(0); + SDValue N001 = N00.getOperand(1); + if (cast<VTSDNode>(N001)->getVT() == MVT::i16) { + SDValue N01 = N0.getOperand(1); + SDValue N02 = N0.getOperand(2); + + // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2, + // i16:Other),IntRegs:i32:$src1, SETLT:Other),IntRegs:i32:$src1, + // IntRegs:i32:$src2) + // Emits: (MAXh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2) + // Pattern complexity = 9 cost = 1 size = 0. + if (cast<CondCodeSDNode>(N02)->get() == ISD::SETLT) { + SDValue N1 = N->getOperand(1); + if (N01 == N1) { + SDValue N2 = N->getOperand(2); + if (N000 == N2 && + N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 && + N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) { + SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl, + MVT::i32, N000); + SDNode *Result = CurDAG->getMachineNode(Hexagon::MAXw_rr, dl, + MVT::i32, + SDValue(SextNode, 0), + N1); + ReplaceUses(N, Result); + return Result; + } + } + } + + // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2, + // i16:Other), IntRegs:i32:$src1, SETGT:Other), IntRegs:i32:$src1, + // IntRegs:i32:$src2) + // Emits: (MINh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2) + // Pattern complexity = 9 cost = 1 size = 0. + if (cast<CondCodeSDNode>(N02)->get() == ISD::SETGT) { + SDValue N1 = N->getOperand(1); + if (N01 == N1) { + SDValue N2 = N->getOperand(2); + if (N000 == N2 && + N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 && + N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) { + SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl, + MVT::i32, N000); + SDNode *Result = CurDAG->getMachineNode(Hexagon::MINw_rr, dl, + MVT::i32, + SDValue(SextNode, 0), + N1); + ReplaceUses(N, Result); + return Result; + } + } + } + } + } + } + + return SelectCode(N); +} + + +SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDValue Shift = N->getOperand(0); + + // + // %conv.i = sext i32 %tmp1 to i64 + // %conv2.i = sext i32 %add to i64 + // %mul.i = mul nsw i64 %conv2.i, %conv.i + // %shr5.i = lshr i64 %mul.i, 32 + // %conv3.i = trunc i64 %shr5.i to i32 + // + // --- match with the following --- + // + // %conv3.i = mpy (%tmp1, %add) + // + // Trunc to i32. + if (N->getValueType(0) == MVT::i32) { + // Trunc from i64. + if (Shift.getNode()->getValueType(0) == MVT::i64) { + // Trunc child is logical shift right. + if (Shift.getOpcode() != ISD::SRL) { + return SelectCode(N); + } + + SDValue ShiftOp0 = Shift.getOperand(0); + SDValue ShiftOp1 = Shift.getOperand(1); + + // Shift by const 32 + if (ShiftOp1.getOpcode() != ISD::Constant) { + return SelectCode(N); + } + + int32_t ShiftConst = + cast<ConstantSDNode>(ShiftOp1.getNode())->getSExtValue(); + if (ShiftConst != 32) { + return SelectCode(N); + } + + // Shifting a i64 signed multiply + SDValue Mul = ShiftOp0; + if (Mul.getOpcode() != ISD::MUL) { + return SelectCode(N); + } + + SDValue MulOp0 = Mul.getOperand(0); + SDValue MulOp1 = Mul.getOperand(1); + + SDValue OP0; + SDValue OP1; + + // Handle sign_extend and sextload + if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) { + SDValue Sext0 = MulOp0.getOperand(0); + if (Sext0.getNode()->getValueType(0) != MVT::i32) { + return SelectCode(N); + } + + OP0 = Sext0; + } else if (MulOp0.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode()); + if (LD->getMemoryVT() != MVT::i32 || + LD->getExtensionType() != ISD::SEXTLOAD || + LD->getAddressingMode() != ISD::UNINDEXED) { + return SelectCode(N); + } + + SDValue Base = LD->getBasePtr(); + SDValue Chain = LD->getChain(); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32, + MVT::Other, + LD->getBasePtr(), + TargetConst0, Chain), 0); + } else { + return SelectCode(N); + } + + // Same goes for the second operand. + if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) { + SDValue Sext1 = MulOp1.getOperand(0); + if (Sext1.getNode()->getValueType(0) != MVT::i32) + return SelectCode(N); + + OP1 = Sext1; + } else if (MulOp1.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode()); + if (LD->getMemoryVT() != MVT::i32 || + LD->getExtensionType() != ISD::SEXTLOAD || + LD->getAddressingMode() != ISD::UNINDEXED) { + return SelectCode(N); + } + + SDValue Base = LD->getBasePtr(); + SDValue Chain = LD->getChain(); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32, + MVT::Other, + LD->getBasePtr(), + TargetConst0, Chain), 0); + } else { + return SelectCode(N); + } + + // Generate a mpy instruction. + SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY, dl, MVT::i32, + OP0, OP1); + ReplaceUses(N, Result); + return Result; + } + } + + return SelectCode(N); +} + + +SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + if (N->getValueType(0) == MVT::i32) { + SDValue Shl_0 = N->getOperand(0); + SDValue Shl_1 = N->getOperand(1); + // RHS is const. + if (Shl_1.getOpcode() == ISD::Constant) { + if (Shl_0.getOpcode() == ISD::MUL) { + SDValue Mul_0 = Shl_0.getOperand(0); // Val + SDValue Mul_1 = Shl_0.getOperand(1); // Const + // RHS of mul is const. + if (Mul_1.getOpcode() == ISD::Constant) { + int32_t ShlConst = + cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue(); + int32_t MulConst = + cast<ConstantSDNode>(Mul_1.getNode())->getSExtValue(); + int32_t ValConst = MulConst << ShlConst; + SDValue Val = CurDAG->getTargetConstant(ValConst, + MVT::i32); + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val.getNode())) + if (isInt<9>(CN->getSExtValue())) { + SDNode* Result = + CurDAG->getMachineNode(Hexagon::MPYI_ri, dl, + MVT::i32, Mul_0, Val); + ReplaceUses(N, Result); + return Result; + } + + } + } else if (Shl_0.getOpcode() == ISD::SUB) { + SDValue Sub_0 = Shl_0.getOperand(0); // Const 0 + SDValue Sub_1 = Shl_0.getOperand(1); // Val + if (Sub_0.getOpcode() == ISD::Constant) { + int32_t SubConst = + cast<ConstantSDNode>(Sub_0.getNode())->getSExtValue(); + if (SubConst == 0) { + if (Sub_1.getOpcode() == ISD::SHL) { + SDValue Shl2_0 = Sub_1.getOperand(0); // Val + SDValue Shl2_1 = Sub_1.getOperand(1); // Const + if (Shl2_1.getOpcode() == ISD::Constant) { + int32_t ShlConst = + cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue(); + int32_t Shl2Const = + cast<ConstantSDNode>(Shl2_1.getNode())->getSExtValue(); + int32_t ValConst = 1 << (ShlConst+Shl2Const); + SDValue Val = CurDAG->getTargetConstant(-ValConst, MVT::i32); + if (ConstantSDNode *CN = + dyn_cast<ConstantSDNode>(Val.getNode())) + if (isInt<9>(CN->getSExtValue())) { + SDNode* Result = + CurDAG->getMachineNode(Hexagon::MPYI_ri, dl, MVT::i32, + Shl2_0, Val); + ReplaceUses(N, Result); + return Result; + } + } + } + } + } + } + } + } + return SelectCode(N); +} + + +// +// If there is an zero_extend followed an intrinsic in DAG (this means - the +// result of the intrinsic is predicate); convert the zero_extend to +// transfer instruction. +// +// Zero extend -> transfer is lowered here. Otherwise, zero_extend will be +// converted into a MUX as predicate registers defined as 1 bit in the +// compiler. Architecture defines them as 8-bit registers. +// We want to preserve all the lower 8-bits and, not just 1 LSB bit. +// +SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDNode *IsIntrinsic = N->getOperand(0).getNode(); + if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) { + unsigned ID = + cast<ConstantSDNode>(IsIntrinsic->getOperand(0))->getZExtValue(); + if (doesIntrinsicReturnPredicate(ID)) { + // Now we need to differentiate target data types. + if (N->getValueType(0) == MVT::i64) { + // Convert the zero_extend to Rs = Pd followed by COMBINE_rr(0,Rs). + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl, + MVT::i32, + SDValue(IsIntrinsic, 0)); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, + MVT::i32, + TargetConst0); + SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl, + MVT::i64, MVT::Other, + SDValue(Result_2, 0), + SDValue(Result_1, 0)); + ReplaceUses(N, Result_3); + return Result_3; + } + if (N->getValueType(0) == MVT::i32) { + // Convert the zero_extend to Rs = Pd + SDNode* RsPd = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl, + MVT::i32, + SDValue(IsIntrinsic, 0)); + ReplaceUses(N, RsPd); + return RsPd; + } + assert(0 && "Unexpected value type"); + } + } + return SelectCode(N); +} + + +// +// Checking for intrinsics which have predicate registers as operand(s) +// and lowering to the actual intrinsic. +// +SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + unsigned ID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IntrinsicWithPred = doesIntrinsicContainPredicate(ID); + + // We are concerned with only those intrinsics that have predicate registers + // as at least one of the operands. + if (IntrinsicWithPred) { + SmallVector<SDValue, 8> Ops; + const MCInstrDesc &MCID = TII->get(IntrinsicWithPred); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + // Iterate over all the operands of the intrinsics. + // For PredRegs, do the transfer. + // For Double/Int Regs, just preserve the value + // For immediates, lower it. + for (unsigned i = 1; i < N->getNumOperands(); ++i) { + SDNode *Arg = N->getOperand(i).getNode(); + const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI); + + if (RC == Hexagon::IntRegsRegisterClass || + RC == Hexagon::DoubleRegsRegisterClass) { + Ops.push_back(SDValue(Arg, 0)); + } else if (RC == Hexagon::PredRegsRegisterClass) { + // Do the transfer. + SDNode *PdRs = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1, + SDValue(Arg, 0)); + Ops.push_back(SDValue(PdRs,0)); + } else if (RC == NULL && (dyn_cast<ConstantSDNode>(Arg) != NULL)) { + // This is immediate operand. Lower it here making sure that we DO have + // const SDNode for immediate value. + int32_t Val = cast<ConstantSDNode>(Arg)->getSExtValue(); + SDValue SDVal = CurDAG->getTargetConstant(Val, MVT::i32); + Ops.push_back(SDVal); + } else { + assert(0 && "Unimplemented"); + } + } + EVT ReturnValueVT = N->getValueType(0); + SDNode *Result = CurDAG->getMachineNode(IntrinsicWithPred, dl, + ReturnValueVT, + Ops.data(), Ops.size()); + ReplaceUses(N, Result); + return Result; + } + return SelectCode(N); +} + + +// +// Map predicate true (encoded as -1 in LLVM) to a XOR. +// +SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + if (N->getValueType(0) == MVT::i1) { + SDNode* Result; + int32_t Val = cast<ConstantSDNode>(N)->getSExtValue(); + if (Val == -1) { + unsigned NewIntReg = TM.getInstrInfo()->createVR(MF, MVT(MVT::i32)); + SDValue Reg = CurDAG->getRegister(NewIntReg, MVT::i32); + + // Create the IntReg = 1 node. + SDNode* IntRegTFR = + CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32, + CurDAG->getTargetConstant(0, MVT::i32)); + + // Pd = IntReg + SDNode* Pd = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1, + SDValue(IntRegTFR, 0)); + + // not(Pd) + SDNode* NotPd = CurDAG->getMachineNode(Hexagon::NOT_pp, dl, MVT::i1, + SDValue(Pd, 0)); + + // xor(not(Pd)) + Result = CurDAG->getMachineNode(Hexagon::XOR_pp, dl, MVT::i1, + SDValue(Pd, 0), SDValue(NotPd, 0)); + + // We have just built: + // Rs = Pd + // Pd = xor(not(Pd), Pd) + + ReplaceUses(N, Result); + return Result; + } + } + + return SelectCode(N); +} + + +// +// Map add followed by a asr -> asr +=. +// +SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + if (N->getValueType(0) != MVT::i32) { + return SelectCode(N); + } + // Identify nodes of the form: add(asr(...)). + SDNode* Src1 = N->getOperand(0).getNode(); + if (Src1->getOpcode() != ISD::SRA || !Src1->hasOneUse() + || Src1->getValueType(0) != MVT::i32) { + return SelectCode(N); + } + + // Build Rd = Rd' + asr(Rs, Rt). The machine constraints will ensure that + // Rd and Rd' are assigned to the same register + SDNode* Result = CurDAG->getMachineNode(Hexagon::ASR_rr_acc, dl, MVT::i32, + N->getOperand(1), + Src1->getOperand(0), + Src1->getOperand(1)); + ReplaceUses(N, Result); + + return Result; +} + + +SDNode *HexagonDAGToDAGISel::Select(SDNode *N) { + if (N->isMachineOpcode()) + return NULL; // Already selected. + + + switch (N->getOpcode()) { + case ISD::Constant: + return SelectConstant(N); + + case ISD::ADD: + return SelectAdd(N); + + case ISD::SHL: + return SelectSHL(N); + + case ISD::LOAD: + return SelectLoad(N); + + case ISD::STORE: + return SelectStore(N); + + case ISD::SELECT: + return SelectSelect(N); + + case ISD::TRUNCATE: + return SelectTruncate(N); + + case ISD::MUL: + return SelectMul(N); + + case ISD::ZERO_EXTEND: + return SelectZeroExtend(N); + + case ISD::INTRINSIC_WO_CHAIN: + return SelectIntrinsicWOChain(N); + } + + return SelectCode(N); +} + + +// +// Hexagon_TODO: Five functions for ADDRri?! Surely there must be a better way +// to define these instructions. +// +bool HexagonDAGToDAGISel::SelectADDRri(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + + +bool HexagonDAGToDAGISel::SelectADDRriS11_0(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_0_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_0_Offset(Offset.getNode())); +} + + +bool HexagonDAGToDAGISel::SelectADDRriS11_1(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_1_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_1_Offset(Offset.getNode())); +} + + +bool HexagonDAGToDAGISel::SelectADDRriS11_2(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_2_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_2_Offset(Offset.getNode())); +} + + +bool HexagonDAGToDAGISel::SelectADDRriU6_0(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsU6_0_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsU6_0_Offset(Offset.getNode())); +} + + +bool HexagonDAGToDAGISel::SelectADDRriU6_1(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsU6_1_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsU6_1_Offset(Offset.getNode())); +} + + +bool HexagonDAGToDAGISel::SelectADDRriU6_2(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsU6_2_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsU6_2_Offset(Offset.getNode())); +} + + +bool HexagonDAGToDAGISel::SelectMEMriS11_2(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + + if (Addr.getOpcode() != ISD::ADD) { + return(SelectADDRriS11_2(Addr, Base, Offset)); + } + + return SelectADDRriS11_2(Addr, Base, Offset); +} + + +bool HexagonDAGToDAGISel::SelectADDRriS11_3(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_3_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_3_Offset(Offset.getNode())); +} + +bool HexagonDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, + SDValue &R2) { + if (Addr.getOpcode() == ISD::FrameIndex) return false; + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (Addr.getOpcode() == ISD::ADD) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) + if (isInt<13>(CN->getSExtValue())) + return false; // Let the reg+imm pattern catch this! + R1 = Addr.getOperand(0); + R2 = Addr.getOperand(1); + return true; + } + + R1 = Addr; + + return true; +} + + +// Handle generic address case. It is accessed from inlined asm =m constraints, +// which could have any kind of pointer. +bool HexagonDAGToDAGISel::SelectAddr(SDNode *Op, SDValue Addr, + SDValue &Base, SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + + if (Addr.getOpcode() == ISD::ADD) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + + +bool HexagonDAGToDAGISel:: +SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, + std::vector<SDValue> &OutOps) { + SDValue Op0, Op1; + + switch (ConstraintCode) { + case 'o': // Offsetable. + case 'v': // Not offsetable. + default: return true; + case 'm': // Memory. + if (!SelectAddr(Op.getNode(), Op, Op0, Op1)) + return true; + break; + } + + OutOps.push_back(Op0); + OutOps.push_back(Op1); + return false; +} diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp new file mode 100644 index 0000000..0ac3cf0 --- /dev/null +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -0,0 +1,1505 @@ +//===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the interfaces that Hexagon uses to lower LLVM code +// into a selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "HexagonISelLowering.h" +#include "HexagonTargetMachine.h" +#include "HexagonMachineFunctionInfo.h" +#include "HexagonTargetObjectFile.h" +#include "HexagonSubtarget.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/InlineAsm.h" +#include "llvm/GlobalVariable.h" +#include "llvm/GlobalAlias.h" +#include "llvm/Intrinsics.h" +#include "llvm/CallingConv.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "HexagonMachineFunctionInfo.h" +#include "llvm/Support/CommandLine.h" + +const unsigned Hexagon_MAX_RET_SIZE = 64; +using namespace llvm; + +static cl::opt<bool> +EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden, + cl::desc("Control jump table emission on Hexagon target")); + +int NumNamedVarArgParams = -1; + +// Implement calling convention for Hexagon. +static bool +CC_Hexagon(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +CC_Hexagon32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +CC_Hexagon64(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +RetCC_Hexagon(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +RetCC_Hexagon32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +RetCC_Hexagon64(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + // NumNamedVarArgParams can not be zero for a VarArg function. + assert ( (NumNamedVarArgParams > 0) && + "NumNamedVarArgParams is not bigger than zero."); + + if ( (int)ValNo < NumNamedVarArgParams ) { + // Deal with named arguments. + return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State); + } + + // Deal with un-named arguments. + unsigned ofst; + if (ArgFlags.isByVal()) { + // If pass-by-value, the size allocated on stack is decided + // by ArgFlags.getByValSize(), not by the size of LocVT. + assert ((ArgFlags.getByValSize() > 8) && + "ByValSize must be bigger than 8 bytes"); + ofst = State.AllocateStack(ArgFlags.getByValSize(), 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::i32) { + ofst = State.AllocateStack(4, 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::i64) { + ofst = State.AllocateStack(8, 8); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + llvm_unreachable(0); + + return true; +} + + +static bool +CC_Hexagon (unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + if (ArgFlags.isByVal()) { + // Passed on stack. + assert ((ArgFlags.getByValSize() > 8) && + "ByValSize must be bigger than 8 bytes"); + unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } + + if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) { + LocVT = MVT::i32; + ValVT = MVT::i32; + if (ArgFlags.isSExt()) + LocInfo = CCValAssign::SExt; + else if (ArgFlags.isZExt()) + LocInfo = CCValAssign::ZExt; + else + LocInfo = CCValAssign::AExt; + } + + if (LocVT == MVT::i32) { + if (!CC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } + + if (LocVT == MVT::i64) { + if (!CC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } + + return true; // CC didn't match. +} + + +static bool CC_Hexagon32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + static const unsigned RegList[] = { + Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, + Hexagon::R5 + }; + if (unsigned Reg = State.AllocateReg(RegList, 6)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + + unsigned Offset = State.AllocateStack(4, 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; +} + +static bool CC_Hexagon64(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + if (unsigned Reg = State.AllocateReg(Hexagon::D0)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + + static const unsigned RegList1[] = { + Hexagon::D1, Hexagon::D2 + }; + static const unsigned RegList2[] = { + Hexagon::R1, Hexagon::R3 + }; + if (unsigned Reg = State.AllocateReg(RegList1, RegList2, 2)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + + unsigned Offset = State.AllocateStack(8, 8, Hexagon::D2); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; +} + +static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + + if (LocVT == MVT::i1 || + LocVT == MVT::i8 || + LocVT == MVT::i16) { + LocVT = MVT::i32; + ValVT = MVT::i32; + if (ArgFlags.isSExt()) + LocInfo = CCValAssign::SExt; + else if (ArgFlags.isZExt()) + LocInfo = CCValAssign::ZExt; + else + LocInfo = CCValAssign::AExt; + } + + if (LocVT == MVT::i32) { + if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } + + if (LocVT == MVT::i64) { + if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } + + return true; // CC didn't match. +} + +static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + if (LocVT == MVT::i32) { + if (unsigned Reg = State.AllocateReg(Hexagon::R0)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + unsigned Offset = State.AllocateStack(4, 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; +} + +static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + if (LocVT == MVT::i64) { + if (unsigned Reg = State.AllocateReg(Hexagon::D0)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + unsigned Offset = State.AllocateStack(8, 8); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; +} + +SDValue +HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) +const { + return SDValue(); +} + +/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified +/// by "Src" to address "Dst" of size "Size". Alignment information is +/// specified by the specific parameter attribute. The copy will be passed as +/// a byval function parameter. Sometimes what we are copying is the end of a +/// larger object, the part that does not fit in registers. +static SDValue +CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, + ISD::ArgFlagsTy Flags, SelectionDAG &DAG, + DebugLoc dl) { + + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); + return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), + /*isVolatile=*/false, /*AlwaysInline=*/false, + MachinePointerInfo(), MachinePointerInfo()); +} + + +// LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is +// passed by value, the function prototype is modified to return void and +// the value is stored in memory pointed by a pointer passed by caller. +SDValue +HexagonTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, SelectionDAG &DAG) const { + + // CCValAssign - represent the assignment of the return value to locations. + SmallVector<CCValAssign, 16> RVLocs; + + // CCState - Info about the registers and stack slot. + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + + // Analyze return values of ISD::RET + CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon); + + SDValue StackPtr = DAG.getRegister(TM.getRegisterInfo()->getStackRegister(), + MVT::i32); + + // If this is the first return lowered for this function, add the regs to the + // liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + if (RVLocs[i].isRegLoc()) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + SDValue Flag; + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + SDValue Ret = OutVals[i]; + ISD::ArgFlagsTy Flags = Outs[i].Flags; + + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag); + + // Guarantee that all emitted copies are stuck together with flags. + Flag = Chain.getValue(1); + } + + if (Flag.getNode()) + return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain, Flag); + + return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain); +} + + + + +/// LowerCallResult - Lower the result values of an ISD::CALL into the +/// appropriate copies out of appropriate physical registers. This assumes that +/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call +/// being lowered. Returns a SDNode with the same number of values as the +/// ISD::CALL. +SDValue +HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const + SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals, + const SmallVectorImpl<SDValue> &OutVals, + SDValue Callee) const { + + // Assign locations to each value returned by this call. + SmallVector<CCValAssign, 16> RVLocs; + + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + + CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + Chain = DAG.getCopyFromReg(Chain, dl, + RVLocs[i].getLocReg(), + RVLocs[i].getValVT(), InFlag).getValue(1); + InFlag = Chain.getValue(2); + InVals.push_back(Chain.getValue(0)); + } + + return Chain; +} + +/// LowerCall - Functions arguments are copied from virtual regs to +/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. +SDValue +HexagonTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + + bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + // Check for varargs. + NumNamedVarArgParams = -1; + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee)) + { + const Function* CalleeFn = NULL; + Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, MVT::i32); + if ((CalleeFn = dyn_cast<Function>(GA->getGlobal()))) + { + // If a function has zero args and is a vararg function, that's + // disallowed so it must be an undeclared function. Do not assume + // varargs if the callee is undefined. + if (CalleeFn->isVarArg() && + CalleeFn->getFunctionType()->getNumParams() != 0) { + NumNamedVarArgParams = CalleeFn->getFunctionType()->getNumParams(); + } + } + } + + if (NumNamedVarArgParams > 0) + CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg); + else + CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon); + + + if(isTailCall) { + bool StructAttrFlag = + DAG.getMachineFunction().getFunction()->hasStructRetAttr(); + isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, + isVarArg, IsStructRet, + StructAttrFlag, + Outs, OutVals, Ins, DAG); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i){ + CCValAssign &VA = ArgLocs[i]; + if (VA.isMemLoc()) { + isTailCall = false; + break; + } + } + if (isTailCall) { + DEBUG(dbgs () << "Eligible for Tail Call\n"); + } else { + DEBUG(dbgs () << + "Argument must be passed on stack. Not eligible for Tail Call\n"); + } + } + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getNextStackOffset(); + SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass; + SmallVector<SDValue, 8> MemOpChains; + + SDValue StackPtr = + DAG.getCopyFromReg(Chain, dl, TM.getRegisterInfo()->getStackRegister(), + getPointerTy()); + + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue Arg = OutVals[i]; + ISD::ArgFlagsTy Flags = Outs[i].Flags; + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: + // Loc info must be one of Full, SExt, ZExt, or AExt. + assert(0 && "Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + } + + if (VA.isMemLoc()) { + unsigned LocMemOffset = VA.getLocMemOffset(); + SDValue PtrOff = DAG.getConstant(LocMemOffset, StackPtr.getValueType()); + PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); + + if (Flags.isByVal()) { + // The argument is a struct passed by value. According to LLVM, "Arg" + // is is pointer. + MemOpChains.push_back(CreateCopyOfByValArgument(Arg, PtrOff, Chain, + Flags, DAG, dl)); + } else { + // The argument is not passed by value. "Arg" is a buildin type. It is + // not a pointer. + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(),false, false, + 0)); + } + continue; + } + + // Arguments that can be passed on register must be kept at RegsToPass + // vector. + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } + } + + // Transform all store nodes into one single node because all store + // nodes are independent of each other. + if (!MemOpChains.empty()) { + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], + MemOpChains.size()); + } + + if (!isTailCall) + Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, + getPointerTy(), true)); + + // Build a sequence of copy-to-reg nodes chained together with token + // chain and flag operands which copy the outgoing args into registers. + // The InFlag in necessary since all emited instructions must be + // stuck together. + SDValue InFlag; + if (!isTailCall) { + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + } + + // For tail calls lower the arguments to the 'real' stack slot. + if (isTailCall) { + // Force all the incoming stack arguments to be loaded from the stack + // before any new outgoing arguments are stored to the stack, because the + // outgoing stack slots may alias the incoming argument stack slots, and + // the alias isn't otherwise explicit. This is slightly more conservative + // than necessary, because it means that each store effectively depends + // on every argument instead of just those arguments it would clobber. + // + // Do not flag preceeding copytoreg stuff together with the following stuff. + InFlag = SDValue(); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + InFlag =SDValue(); + } + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + if (flag_aligned_memcpy) { + const char *MemcpyName = + "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes"; + Callee = + DAG.getTargetExternalSymbol(MemcpyName, getPointerTy()); + flag_aligned_memcpy = false; + } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy()); + } else if (ExternalSymbolSDNode *S = + dyn_cast<ExternalSymbolSDNode>(Callee)) { + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); + } + + // Returns a chain & a flag for retval copy to use. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SmallVector<SDValue, 8> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + } + + if (InFlag.getNode()) { + Ops.push_back(InFlag); + } + + if (isTailCall) + return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + + Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + // Create the CALLSEQ_END node. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); + InFlag = Chain.getValue(1); + + // Handle result values, copying them out of physregs into vregs that we + // return. + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, + InVals, OutVals, Callee); +} + +static bool getIndexedAddressParts(SDNode *Ptr, EVT VT, + bool isSEXTLoad, SDValue &Base, + SDValue &Offset, bool &isInc, + SelectionDAG &DAG) { + if (Ptr->getOpcode() != ISD::ADD) + return false; + + if (VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) { + isInc = (Ptr->getOpcode() == ISD::ADD); + Base = Ptr->getOperand(0); + Offset = Ptr->getOperand(1); + // Ensure that Offset is a constant. + return (isa<ConstantSDNode>(Offset)); + } + + return false; +} + +// TODO: Put this function along with the other isS* functions in +// HexagonISelDAGToDAG.cpp into a common file. Or better still, use the +// functions defined in HexagonImmediates.td. +static bool Is_PostInc_S4_Offset(SDNode * S, int ShiftAmount) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // immS4 predicate - True if the immediate fits in a 4-bit sign extended. + // field. + int64_t v = (int64_t)N->getSExtValue(); + int64_t m = 0; + if (ShiftAmount > 0) { + m = v % ShiftAmount; + v = v >> ShiftAmount; + } + return (v <= 7) && (v >= -8) && (m == 0); +} + +/// getPostIndexedAddressParts - returns true by value, base pointer and +/// offset pointer and addressing mode by reference if this node can be +/// combined with a load / store to form a post-indexed load / store. +bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const +{ + EVT VT; + SDValue Ptr; + bool isSEXTLoad = false; + + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + VT = LD->getMemoryVT(); + isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + VT = ST->getMemoryVT(); + if (ST->getValue().getValueType() == MVT::i64 && ST->isTruncatingStore()) { + return false; + } + } else { + return false; + } + + bool isInc; + bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, + isInc, DAG); + // ShiftAmount = number of left-shifted bits in the Hexagon instruction. + int ShiftAmount = VT.getSizeInBits() / 16; + if (isLegal && Is_PostInc_S4_Offset(Offset.getNode(), ShiftAmount)) { + AM = isInc ? ISD::POST_INC : ISD::POST_DEC; + return true; + } + + return false; +} + +SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op, + SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + MachineFunction &MF = DAG.getMachineFunction(); + HexagonMachineFunctionInfo *FuncInfo = + MF.getInfo<HexagonMachineFunctionInfo>(); + switch (Node->getOpcode()) { + case ISD::INLINEASM: { + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the flag operand. + + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + if (FuncInfo->hasClobberLR()) + break; + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + ++i; // Skip the ID value. + + switch (InlineAsm::getKind(Flags)) { + default: llvm_unreachable("Bad flags!"); + case InlineAsm::Kind_RegDef: + case InlineAsm::Kind_RegUse: + case InlineAsm::Kind_Imm: + case InlineAsm::Kind_Clobber: + case InlineAsm::Kind_Mem: { + for (; NumVals; --NumVals, ++i) {} + break; + } + case InlineAsm::Kind_RegDefEarlyClobber: { + for (; NumVals; --NumVals, ++i) { + unsigned Reg = + cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + + // Check it to be lr + if (Reg == TM.getRegisterInfo()->getRARegister()) { + FuncInfo->setHasClobberLR(true); + break; + } + } + break; + } + } + } + } + } // Node->getOpcode + return Op; +} + + +// +// Taken from the XCore backend. +// +SDValue HexagonTargetLowering:: +LowerBR_JT(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Chain = Op.getOperand(0); + SDValue Table = Op.getOperand(1); + SDValue Index = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); + unsigned JTI = JT->getIndex(); + MachineFunction &MF = DAG.getMachineFunction(); + const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); + + // Mark all jump table targets as address taken. + const std::vector<MachineJumpTableEntry> &JTE = MJTI->getJumpTables(); + const std::vector<MachineBasicBlock*> &JTBBs = JTE[JTI].MBBs; + for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { + MachineBasicBlock *MBB = JTBBs[i]; + MBB->setHasAddressTaken(); + // This line is needed to set the hasAddressTaken flag on the BasicBlock + // object. + BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock())); + } + + SDValue JumpTableBase = DAG.getNode(HexagonISD::WrapperJT, dl, + getPointerTy(), TargetJT); + SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index, + DAG.getConstant(2, MVT::i32)); + SDValue JTAddress = DAG.getNode(ISD::ADD, dl, MVT::i32, JumpTableBase, + ShiftIndex); + SDValue LoadTarget = DAG.getLoad(MVT::i32, dl, Chain, JTAddress, + MachinePointerInfo(), false, false, false, + 0); + return DAG.getNode(HexagonISD::BR_JT, dl, MVT::Other, Chain, LoadTarget); +} + + +SDValue +HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + + unsigned SPReg = getStackPointerRegisterToSaveRestore(); + + // Get a reference to the stack pointer. + SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32); + + // Subtract the dynamic size from the actual stack size to + // obtain the new stack size. + SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size); + + // + // For Hexagon, the outgoing memory arguments area should be on top of the + // alloca area on the stack i.e., the outgoing memory arguments should be + // at a lower address than the alloca area. Move the alloca area down the + // stack by adding back the space reserved for outgoing arguments to SP + // here. + // + // We do not know what the size of the outgoing args is at this point. + // So, we add a pseudo instruction ADJDYNALLOC that will adjust the + // stack pointer. We patch this instruction with the correct, known + // offset in emitPrologue(). + // + // Use a placeholder immediate (zero) for now. This will be patched up + // by emitPrologue(). + SDValue ArgAdjust = DAG.getNode(HexagonISD::ADJDYNALLOC, dl, + MVT::i32, + Sub, + DAG.getConstant(0, MVT::i32)); + + // The Sub result contains the new stack start address, so it + // must be placed in the stack pointer register. + SDValue CopyChain = DAG.getCopyToReg(Chain, dl, + TM.getRegisterInfo()->getStackRegister(), + Sub); + + SDValue Ops[2] = { ArgAdjust, CopyChain }; + return DAG.getMergeValues(Ops, 2, dl); +} + +SDValue +HexagonTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const + SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) +const { + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + HexagonMachineFunctionInfo *FuncInfo = + MF.getInfo<HexagonMachineFunctionInfo>(); + + + // Assign locations to all of the incoming arguments. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon); + + // For LLVM, in the case when returning a struct by value (>8byte), + // the first argument is a pointer that points to the location on caller's + // stack where the return value will be stored. For Hexagon, the location on + // caller's stack is passed only when the struct size is smaller than (and + // equal to) 8 bytes. If not, no address will be passed into callee and + // callee return the result direclty through R0/R1. + + SmallVector<SDValue, 4> MemOps; + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + ISD::ArgFlagsTy Flags = Ins[i].Flags; + unsigned ObjSize; + unsigned StackLocation; + int FI; + + if ( (VA.isRegLoc() && !Flags.isByVal()) + || (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() > 8)) { + // Arguments passed in registers + // 1. int, long long, ptr args that get allocated in register. + // 2. Large struct that gets an register to put its address in. + EVT RegVT = VA.getLocVT(); + if (RegVT == MVT::i8 || RegVT == MVT::i16 || RegVT == MVT::i32) { + unsigned VReg = + RegInfo.createVirtualRegister(Hexagon::IntRegsRegisterClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + } else if (RegVT == MVT::i64) { + unsigned VReg = + RegInfo.createVirtualRegister(Hexagon::DoubleRegsRegisterClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + } else { + assert (0); + } + } else if (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() <= 8) { + assert (0 && "ByValSize must be bigger than 8 bytes"); + } else { + // Sanity check. + assert(VA.isMemLoc()); + + if (Flags.isByVal()) { + // If it's a byval parameter, then we need to compute the + // "real" size, not the size of the pointer. + ObjSize = Flags.getByValSize(); + } else { + ObjSize = VA.getLocVT().getStoreSizeInBits() >> 3; + } + + StackLocation = HEXAGON_LRFP_SIZE + VA.getLocMemOffset(); + // Create the frame index object for this incoming parameter... + FI = MFI->CreateFixedObject(ObjSize, StackLocation, true); + + // Create the SelectionDAG nodes cordl, responding to a load + // from this parameter. + SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); + + if (Flags.isByVal()) { + // If it's a pass-by-value aggregate, then do not dereference the stack + // location. Instead, we should generate a reference to the stack + // location. + InVals.push_back(FIN); + } else { + InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, + MachinePointerInfo(), false, false, + false, 0)); + } + } + } + + if (!MemOps.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0], + MemOps.size()); + + if (isVarArg) { + // This will point to the next argument passed via stack. + int FrameIndex = MFI->CreateFixedObject(Hexagon_PointerSize, + HEXAGON_LRFP_SIZE + + CCInfo.getNextStackOffset(), + true); + FuncInfo->setVarArgsFrameIndex(FrameIndex); + } + + return Chain; +} + +SDValue +HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { + // VASTART stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + MachineFunction &MF = DAG.getMachineFunction(); + HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>(); + SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), Op.getDebugLoc(), Addr, + Op.getOperand(1), MachinePointerInfo(SV), false, + false, 0); +} + +SDValue +HexagonTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { + SDNode* OpNode = Op.getNode(); + + SDValue Cond = DAG.getNode(ISD::SETCC, Op.getDebugLoc(), MVT::i1, + Op.getOperand(2), Op.getOperand(3), + Op.getOperand(4)); + return DAG.getNode(ISD::SELECT, Op.getDebugLoc(), OpNode->getValueType(0), + Cond, Op.getOperand(0), + Op.getOperand(1)); +} + +SDValue +HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setReturnAddressIsTaken(true); + + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + if (Depth) { + SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); + SDValue Offset = DAG.getConstant(4, MVT::i32); + return DAG.getLoad(VT, dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), + MachinePointerInfo(), false, false, false, 0); + } + + // Return LR, which contains the return address. Mark it an implicit live-in. + unsigned Reg = MF.addLiveIn(TRI->getRARegister(), getRegClassFor(MVT::i32)); + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); +} + +SDValue +HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { + const HexagonRegisterInfo *TRI = TM.getRegisterInfo(); + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setFrameAddressIsTaken(true); + + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, + TRI->getFrameRegister(), VT); + while (Depth--) + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, + MachinePointerInfo(), + false, false, false, 0); + return FrameAddr; +} + + +SDValue HexagonTargetLowering::LowerMEMBARRIER(SDValue Op, + SelectionDAG& DAG) const { + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0)); +} + + +SDValue HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, + SelectionDAG& DAG) const { + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0)); +} + + +SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, + SelectionDAG &DAG) const { + SDValue Result; + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); + DebugLoc dl = Op.getDebugLoc(); + Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset); + + HexagonTargetObjectFile &TLOF = + (HexagonTargetObjectFile&)getObjFileLowering(); + if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) { + return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result); + } + + return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result); +} + +//===----------------------------------------------------------------------===// +// TargetLowering Implementation +//===----------------------------------------------------------------------===// + +HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine + &targetmachine) + : TargetLowering(targetmachine, new HexagonTargetObjectFile()), + TM(targetmachine) { + + // Set up the register classes. + addRegisterClass(MVT::i32, Hexagon::IntRegsRegisterClass); + addRegisterClass(MVT::i64, Hexagon::DoubleRegsRegisterClass); + + addRegisterClass(MVT::i1, Hexagon::PredRegsRegisterClass); + + computeRegisterProperties(); + + // Align loop entry + setPrefLoopAlignment(4); + + // Limits for inline expansion of memcpy/memmove + maxStoresPerMemcpy = 6; + maxStoresPerMemmove = 6; + + // + // Library calls for unsupported operations + // + setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2"); + + setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf"); + setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf"); + setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf"); + setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf"); + setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf"); + setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf"); + setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf"); + + setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi"); + setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi"); + setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti"); + + setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi"); + setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi"); + setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti"); + + setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf"); + setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi"); + setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti"); + setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi"); + setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti"); + + setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2"); + + setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3"); + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setLibcallName(RTLIB::SREM_I32, "__hexagon_umodsi3"); + setOperationAction(ISD::SREM, MVT::i32, Expand); + + setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3"); + setOperationAction(ISD::SDIV, MVT::i64, Expand); + setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3"); + setOperationAction(ISD::SREM, MVT::i64, Expand); + + setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3"); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + + setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3"); + setOperationAction(ISD::UDIV, MVT::i64, Expand); + + setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3"); + setOperationAction(ISD::UREM, MVT::i32, Expand); + + setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3"); + setOperationAction(ISD::UREM, MVT::i64, Expand); + + setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3"); + setOperationAction(ISD::FDIV, MVT::f32, Expand); + + setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3"); + setOperationAction(ISD::FDIV, MVT::f64, Expand); + + setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2"); + setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand); + + setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf"); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); + + setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3"); + setOperationAction(ISD::FADD, MVT::f64, Expand); + + setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3"); + setOperationAction(ISD::FADD, MVT::f32, Expand); + + setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3"); + setOperationAction(ISD::FADD, MVT::f32, Expand); + + setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2"); + setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand); + + setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi"); + setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand); + + setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi"); + setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand); + + setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf"); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); + + setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2"); + setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); + + setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2"); + setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); + + setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2"); + setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); + + setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2"); + setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); + + setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2"); + setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); + + setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2"); + setCondCodeAction(ISD::SETOLT, MVT::f64, Expand); + + setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2"); + setCondCodeAction(ISD::SETOLT, MVT::f32, Expand); + + setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3"); + setOperationAction(ISD::SREM, MVT::i32, Expand); + + setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3"); + setOperationAction(ISD::FMUL, MVT::f64, Expand); + + setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3"); + setOperationAction(ISD::MUL, MVT::f32, Expand); + + setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2"); + setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); + + setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2"); + + + setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3"); + setOperationAction(ISD::SUB, MVT::f64, Expand); + + setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3"); + setOperationAction(ISD::SUB, MVT::f32, Expand); + + setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2"); + setOperationAction(ISD::FP_ROUND, MVT::f64, Expand); + + setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2"); + setCondCodeAction(ISD::SETUO, MVT::f64, Expand); + + setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2"); + setCondCodeAction(ISD::SETO, MVT::f64, Expand); + + setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2"); + setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand); + + setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2"); + setCondCodeAction(ISD::SETO, MVT::f32, Expand); + + setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2"); + setCondCodeAction(ISD::SETUO, MVT::f32, Expand); + + setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal); + setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal); + setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal); + setIndexedLoadAction(ISD::POST_INC, MVT::i64, Legal); + + setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal); + setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal); + setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); + setIndexedStoreAction(ISD::POST_INC, MVT::i64, Legal); + + setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); + + // Turn FP extload into load/fextend. + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + // Hexagon has a i1 sign extending load. + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand); + // Turn FP truncstore into trunc + store. + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + // Custom legalize GlobalAddress nodes into CONST32. + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i8, Custom); + // Truncate action? + setOperationAction(ISD::TRUNCATE, MVT::i64, Expand); + + // Hexagon doesn't have sext_inreg, replace them with shl/sra. + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); + + // Hexagon has no REM or DIVREM operations. + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + + setOperationAction(ISD::BSWAP, MVT::i64, Expand); + + // Expand fp<->uint. + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + + // Hexagon has no select or setcc: expand to SELECT_CC. + setOperationAction(ISD::SELECT, MVT::f32, Expand); + setOperationAction(ISD::SELECT, MVT::f64, Expand); + + // Lower SELECT_CC to SETCC and SELECT. + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); + // This is a workaround documented in DAGCombiner.cpp:2892 We don't + // support SELECT_CC on every type. + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + + setOperationAction(ISD::BR_CC, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); + if (EmitJumpTables) { + setOperationAction(ISD::BR_JT, MVT::Other, Custom); + } else { + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + } + + setOperationAction(ISD::BR_CC, MVT::i32, Expand); + + setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FREM , MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FREM , MVT::f32, Expand); + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTTZ , MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTLZ , MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::ROTL , MVT::i32, Expand); + setOperationAction(ISD::ROTR , MVT::i32, Expand); + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FPOW , MVT::f64, Expand); + setOperationAction(ISD::FPOW , MVT::f32, Expand); + + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); + + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + + setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + + setOperationAction(ISD::EH_RETURN, MVT::Other, Expand); + + if (TM.getSubtargetImpl()->isSubtargetV2()) { + setExceptionPointerRegister(Hexagon::R20); + setExceptionSelectorRegister(Hexagon::R21); + } else { + setExceptionPointerRegister(Hexagon::R0); + setExceptionSelectorRegister(Hexagon::R1); + } + + // VASTART needs to be custom lowered to use the VarArgsFrameIndex. + setOperationAction(ISD::VASTART , MVT::Other, Custom); + + // Use the default implementation. + setOperationAction(ISD::VAARG , MVT::Other, Expand); + setOperationAction(ISD::VACOPY , MVT::Other, Expand); + setOperationAction(ISD::VAEND , MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); + + + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); + setOperationAction(ISD::INLINEASM , MVT::Other, Custom); + + setMinFunctionAlignment(2); + + // Needed for DYNAMIC_STACKALLOC expansion. + unsigned StackRegister = TM.getRegisterInfo()->getStackRegister(); + setStackPointerRegisterToSaveRestore(StackRegister); +} + + +const char* +HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return 0; + case HexagonISD::CONST32: return "HexagonISD::CONST32"; + case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC"; + case HexagonISD::CMPICC: return "HexagonISD::CMPICC"; + case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC"; + case HexagonISD::BRICC: return "HexagonISD::BRICC"; + case HexagonISD::BRFCC: return "HexagonISD::BRFCC"; + case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC"; + case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC"; + case HexagonISD::Hi: return "HexagonISD::Hi"; + case HexagonISD::Lo: return "HexagonISD::Lo"; + case HexagonISD::FTOI: return "HexagonISD::FTOI"; + case HexagonISD::ITOF: return "HexagonISD::ITOF"; + case HexagonISD::CALL: return "HexagonISD::CALL"; + case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; + case HexagonISD::BR_JT: return "HexagonISD::BR_JT"; + case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; + } +} + +bool +HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { + EVT MTy1 = EVT::getEVT(Ty1); + EVT MTy2 = EVT::getEVT(Ty2); + if (!MTy1.isSimple() || !MTy2.isSimple()) { + return false; + } + return ((MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32)); +} + +bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { + if (!VT1.isSimple() || !VT2.isSimple()) { + return false; + } + return ((VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32)); +} + +SDValue +HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: assert(0 && "Should not custom lower this!"); + // Frame & Return address. Currently unimplemented. + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); + case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::GlobalTLSAddress: + assert(0 && "TLS not implemented for Hexagon."); + case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG); + case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); + case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG); + case ISD::BR_JT: return LowerBR_JT(Op, DAG); + + case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INLINEASM: return LowerINLINEASM(Op, DAG); + + } +} + + + +//===----------------------------------------------------------------------===// +// Hexagon Scheduler Hooks +//===----------------------------------------------------------------------===// +MachineBasicBlock * +HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) +const { + switch (MI->getOpcode()) { + case Hexagon::ADJDYNALLOC: { + MachineFunction *MF = BB->getParent(); + HexagonMachineFunctionInfo *FuncInfo = + MF->getInfo<HexagonMachineFunctionInfo>(); + FuncInfo->addAllocaAdjustInst(MI); + return BB; + } + default: + assert(false && "Unexpected instr type to insert"); + } // switch + return NULL; +} + +//===----------------------------------------------------------------------===// +// Inline Assembly Support +//===----------------------------------------------------------------------===// + +std::pair<unsigned, const TargetRegisterClass*> +HexagonTargetLowering::getRegForInlineAsmConstraint(const + std::string &Constraint, + EVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': // R0-R31 + switch (VT.getSimpleVT().SimpleTy) { + default: + assert(0 && "getRegForInlineAsmConstraint Unhandled data type"); + case MVT::i32: + case MVT::i16: + case MVT::i8: + return std::make_pair(0U, Hexagon::IntRegsRegisterClass); + case MVT::i64: + return std::make_pair(0U, Hexagon::DoubleRegsRegisterClass); + } + default: + assert(0 && "Unknown asm register class"); + } + } + + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + +/// isLegalAddressingMode - Return true if the addressing mode represented by +/// AM is legal for this target, for a load/store of the specified type. +bool HexagonTargetLowering::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { + // Allows a signed-extended 11-bit immediate field. + if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1) { + return false; + } + + // No global is ever allowed as a base. + if (AM.BaseGV) { + return false; + } + + int Scale = AM.Scale; + if (Scale < 0) Scale = -Scale; + switch (Scale) { + case 0: // No scale reg, "r+i", "r", or just "i". + break; + default: // No scaled addressing mode. + return false; + } + return true; +} + +/// isLegalICmpImmediate - Return true if the specified immediate is legal +/// icmp immediate, that is the target has icmp instructions which can compare +/// a register against the immediate without having to materialize the +/// immediate into a register. +bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + return Imm >= -512 && Imm <= 511; +} + +/// IsEligibleForTailCallOptimization - Check whether the call is eligible +/// for tail call optimization. Targets which want to do tail call +/// optimization should implement this function. +bool HexagonTargetLowering::IsEligibleForTailCallOptimization( + SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + bool isCalleeStructRet, + bool isCallerStructRet, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const { + const Function *CallerF = DAG.getMachineFunction().getFunction(); + CallingConv::ID CallerCC = CallerF->getCallingConv(); + bool CCMatch = CallerCC == CalleeCC; + + // *************************************************************************** + // Look for obvious safe cases to perform tail call optimization that do not + // require ABI changes. + // *************************************************************************** + + // If this is a tail call via a function pointer, then don't do it! + if (!(dyn_cast<GlobalAddressSDNode>(Callee)) + && !(dyn_cast<ExternalSymbolSDNode>(Callee))) { + return false; + } + + // Do not optimize if the calling conventions do not match. + if (!CCMatch) + return false; + + // Do not tail call optimize vararg calls. + if (isVarArg) + return false; + + // Also avoid tail call optimization if either caller or callee uses struct + // return semantics. + if (isCalleeStructRet || isCallerStructRet) + return false; + + // In addition to the cases above, we also disable Tail Call Optimization if + // the calling convention code that at least one outgoing argument needs to + // go on the stack. We cannot check that here because at this point that + // information is not available. + return true; +} diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h new file mode 100644 index 0000000..b327615 --- /dev/null +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -0,0 +1,162 @@ +//==-- HexagonISelLowering.h - Hexagon DAG Lowering Interface ----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that Hexagon uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef Hexagon_ISELLOWERING_H +#define Hexagon_ISELLOWERING_H + +#include "llvm/Target/TargetLowering.h" +#include "llvm/CallingConv.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "Hexagon.h" + +namespace llvm { + namespace HexagonISD { + enum { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + CONST32, + CONST32_GP, // For marking data present in GP. + SETCC, + ADJDYNALLOC, + ARGEXTEND, + + CMPICC, // Compare two GPR operands, set icc. + CMPFCC, // Compare two FP operands, set fcc. + BRICC, // Branch to dest on icc condition + BRFCC, // Branch to dest on fcc condition + SELECT_ICC, // Select between two values using the current ICC flags. + SELECT_FCC, // Select between two values using the current FCC flags. + + Hi, Lo, // Hi/Lo operations, typically on a global address. + + FTOI, // FP to Int within a FP register. + ITOF, // Int to FP within a FP register. + + CALL, // A call instruction. + RET_FLAG, // Return with a flag operand. + BR_JT, // Jump table. + BARRIER, // Memory barrier. + WrapperJT, + TC_RETURN + }; + } + + class HexagonTargetLowering : public TargetLowering { + int VarArgsFrameOffset; // Frame offset to start of varargs area. + + bool CanReturnSmallStruct(const Function* CalleeFn, + unsigned& RetSize) const; + + public: + HexagonTargetMachine &TM; + explicit HexagonTargetLowering(HexagonTargetMachine &targetmachine); + + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. Targets which want to do tail call + /// optimization should implement this function. + bool + IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + bool isCalleeStructRet, + bool isCallerStructRet, + const + SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const; + + virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; + virtual bool isTruncateFree(EVT VT1, EVT VT2) const; + + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + + virtual const char *getTargetNodeName(unsigned Opcode) const; + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals, + const SmallVectorImpl<SDValue> &OutVals, + SDValue Callee) const; + + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const; + SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, SelectionDAG &DAG) const; + + virtual MachineBasicBlock + *EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const; + + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + virtual EVT getSetCCResultType(EVT VT) const { + return MVT::i1; + } + + virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const; + + std::pair<unsigned, const TargetRegisterClass*> + getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const; + + // Intrinsics + virtual SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const; + /// isLegalAddressingMode - Return true if the addressing mode represented + /// by AM is legal for this target, for a load/store of the specified type. + /// The type may be VoidTy, in which case only return true if the addressing + /// mode is legal for a load/store of any legal type. + /// TODO: Handle pre/postinc as well. + virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const; + + /// isLegalICmpImmediate - Return true if the specified immediate is legal + /// icmp immediate, that is the target has icmp instructions which can + /// compare a register against the immediate without having to materialize + /// the immediate into a register. + virtual bool isLegalICmpImmediate(int64_t Imm) const; + }; +} // end namespace llvm + +#endif // Hexagon_ISELLOWERING_H diff --git a/lib/Target/Hexagon/HexagonImmediates.td b/lib/Target/Hexagon/HexagonImmediates.td new file mode 100644 index 0000000..1e3fcb8 --- /dev/null +++ b/lib/Target/Hexagon/HexagonImmediates.td @@ -0,0 +1,491 @@ +//=- HexagonImmediates.td - Hexagon immediate processing --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illnois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// From IA64's InstrInfo file +def s32Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s16Imm : Operand<i32> { + let PrintMethod = "printHexagonImmOperand"; +} + +def s12Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s11Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s11_0Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s11_1Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s11_2Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s11_3Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s10Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s8Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s9Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s8Imm64 : Operand<i64> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s6Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s4Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s4_0Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s4_1Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s4_2Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s4_3Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u64Imm : Operand<i64> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u32Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u16Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u16_0Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u16_1Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u16_2Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u11_3Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u10Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u9Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u8Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u7Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u6Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u6_0Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u6_1Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u6_2Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u6_3Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u5Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u4Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u3Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u2Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def n8Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def m6Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +// +// Immediate predicates +// +def s32ImmPred : PatLeaf<(i32 imm), [{ + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<32>(v); +}]>; + +def s32_24ImmPred : PatLeaf<(i32 imm), [{ + // s32_24ImmPred predicate - True if the immediate fits in a 32-bit sign + // extended field that is a multiple of 0x1000000. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<32,24>(v); +}]>; + +def s32_16s8ImmPred : PatLeaf<(i32 imm), [{ + // s32_16s8ImmPred predicate - True if the immediate fits in a 32-bit sign + // extended field that is a multiple of 0x10000. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<24,16>(v); +}]>; + +def s16ImmPred : PatLeaf<(i32 imm), [{ + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<16>(v); +}]>; + + +def s13ImmPred : PatLeaf<(i32 imm), [{ + // immS13 predicate - True if the immediate fits in a 13-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<13>(v); +}]>; + + +def s12ImmPred : PatLeaf<(i32 imm), [{ + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<12>(v); +}]>; + +def s11_0ImmPred : PatLeaf<(i32 imm), [{ + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<11>(v); +}]>; + + +def s11_1ImmPred : PatLeaf<(i32 imm), [{ + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,1>(v); +}]>; + + +def s11_2ImmPred : PatLeaf<(i32 imm), [{ + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,2>(v); +}]>; + + +def s11_3ImmPred : PatLeaf<(i32 imm), [{ + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,3>(v); +}]>; + + +def s10ImmPred : PatLeaf<(i32 imm), [{ + // s10ImmPred predicate - True if the immediate fits in a 10-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<10>(v); +}]>; + + +def s9ImmPred : PatLeaf<(i32 imm), [{ + // s9ImmPred predicate - True if the immediate fits in a 9-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<9>(v); +}]>; + + +def s8ImmPred : PatLeaf<(i32 imm), [{ + // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<8>(v); +}]>; + + +def s8Imm64Pred : PatLeaf<(i64 imm), [{ + // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<8>(v); +}]>; + + +def s6ImmPred : PatLeaf<(i32 imm), [{ + // s6ImmPred predicate - True if the immediate fits in a 6-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<6>(v); +}]>; + + +def s4_0ImmPred : PatLeaf<(i32 imm), [{ + // s4_0ImmPred predicate - True if the immediate fits in a 4-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<4>(v); +}]>; + + +def s4_1ImmPred : PatLeaf<(i32 imm), [{ + // s4_1ImmPred predicate - True if the immediate fits in a 4-bit sign extended + // field of 2. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<4,1>(v); +}]>; + + +def s4_2ImmPred : PatLeaf<(i32 imm), [{ + // s4_2ImmPred predicate - True if the immediate fits in a 4-bit sign extended + // field that is a multiple of 4. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<4,2>(v); +}]>; + + +def s4_3ImmPred : PatLeaf<(i32 imm), [{ + // s4_3ImmPred predicate - True if the immediate fits in a 4-bit sign extended + // field that is a multiple of 8. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<4,3>(v); +}]>; + + +def u64ImmPred : PatLeaf<(i64 imm), [{ + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + // Adding "N ||" to supress gcc unused warning. + return (N || true); +}]>; + +def u32ImmPred : PatLeaf<(i32 imm), [{ + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<32>(v); +}]>; + +def u16ImmPred : PatLeaf<(i32 imm), [{ + // u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<16>(v); +}]>; + +def u16_s8ImmPred : PatLeaf<(i32 imm), [{ + // u16_s8ImmPred predicate - True if the immediate fits in a 16-bit sign + // extended s8 field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<16,8>(v); +}]>; + +def u9ImmPred : PatLeaf<(i32 imm), [{ + // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<9>(v); +}]>; + + +def u8ImmPred : PatLeaf<(i32 imm), [{ + // u8ImmPred predicate - True if the immediate fits in a 8-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<8>(v); +}]>; + +def u7ImmPred : PatLeaf<(i32 imm), [{ + // u7ImmPred predicate - True if the immediate fits in a 8-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<7>(v); +}]>; + + +def u6ImmPred : PatLeaf<(i32 imm), [{ + // u6ImmPred predicate - True if the immediate fits in a 6-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<6>(v); +}]>; + +def u6_0ImmPred : PatLeaf<(i32 imm), [{ + // u6_0ImmPred predicate - True if the immediate fits in a 6-bit unsigned + // field. Same as u6ImmPred. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<6>(v); +}]>; + +def u6_1ImmPred : PatLeaf<(i32 imm), [{ + // u6_1ImmPred predicate - True if the immediate fits in a 6-bit unsigned + // field that is 1 bit alinged - multiple of 2. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<6,1>(v); +}]>; + +def u6_2ImmPred : PatLeaf<(i32 imm), [{ + // u6_2ImmPred predicate - True if the immediate fits in a 6-bit unsigned + // field that is 2 bits alinged - multiple of 4. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<6,2>(v); +}]>; + +def u6_3ImmPred : PatLeaf<(i32 imm), [{ + // u6_3ImmPred predicate - True if the immediate fits in a 6-bit unsigned + // field that is 3 bits alinged - multiple of 8. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<6,3>(v); +}]>; + +def u5ImmPred : PatLeaf<(i32 imm), [{ + // u5ImmPred predicate - True if the immediate fits in a 5-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<5>(v); +}]>; + + +def u3ImmPred : PatLeaf<(i32 imm), [{ + // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<3>(v); +}]>; + + +def u2ImmPred : PatLeaf<(i32 imm), [{ + // u2ImmPred predicate - True if the immediate fits in a 2-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<2>(v); +}]>; + + +def u1ImmPred : PatLeaf<(i1 imm), [{ + // u1ImmPred predicate - True if the immediate fits in a 1-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<1>(v); +}]>; + +def m6ImmPred : PatLeaf<(i32 imm), [{ + // m6ImmPred predicate - True if the immediate is negative and fits in + // a 6-bit negative number. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<6>(v); +}]>; + +//InN means negative integers in [-(2^N - 1), 0] +def n8ImmPred : PatLeaf<(i32 imm), [{ + // n8ImmPred predicate - True if the immediate fits in a 8-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return (-255 <= v && v <= 0); +}]>; diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td new file mode 100644 index 0000000..7e92776 --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrFormats.td @@ -0,0 +1,242 @@ +//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr, + InstrItinClass itin> : Instruction { + field bits<32> Inst; + + let Namespace = "Hexagon"; + +/* Commented out for Hexagon + bits<2> op; + let Inst{31-30} = op; */ // Top two bits are the 'op' field + + dag OutOperandList = outs; + dag InOperandList = ins; + let AsmString = asmstr; + let Pattern = pattern; + let Constraints = cstr; + let Itinerary = itin; +} + +//----------------------------------------------------------------------------// +// Intruction Classes Definitions + +//----------------------------------------------------------------------------// + +// LD Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", LD> { + bits<5> rd; + bits<5> rs; + bits<13> imm13; +} + +// LD Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr> + : InstHexagon<outs, ins, asmstr, pattern, cstr, LD> { + bits<5> rd; + bits<5> rs; + bits<5> rt; + bits<13> imm13; +} + +// ST Instruction Class in V2/V3 can take SLOT0 only. +// ST Instruction Class in V4 can take SLOT0 & SLOT1. +// Definition of the instruction class CHANGED from V2/V3 to V4. +class STInst<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", ST> { + bits<5> rd; + bits<5> rs; + bits<13> imm13; +} + +// ST Instruction Class in V2/V3 can take SLOT0 only. +// ST Instruction Class in V4 can take SLOT0 & SLOT1. +// Definition of the instruction class CHANGED from V2/V3 to V4. +class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr> + : InstHexagon<outs, ins, asmstr, pattern, cstr, ST> { + bits<5> rd; + bits<5> rs; + bits<5> rt; + bits<13> imm13; +} + +// ALU32 Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class ALU32Type<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", ALU32> { + bits<5> rd; + bits<5> rs; + bits<5> rt; + bits<16> imm16; + bits<16> imm16_2; +} + +// ALU64 Instruction Class in V2/V3. +// XTYPE Instruction Class in V4. +// Definition of the instruction class NOT CHANGED. +// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4. +class ALU64Type<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", ALU64> { + bits<5> rd; + bits<5> rs; + bits<5> rt; + bits<16> imm16; + bits<16> imm16_2; +} + +// M Instruction Class in V2/V3. +// XTYPE Instruction Class in V4. +// Definition of the instruction class NOT CHANGED. +// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. +class MInst<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", M> { + bits<5> rd; + bits<5> rs; + bits<5> rt; +} + +// M Instruction Class in V2/V3. +// XTYPE Instruction Class in V4. +// Definition of the instruction class NOT CHANGED. +// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. +class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr> + : InstHexagon<outs, ins, asmstr, pattern, cstr, M> { + bits<5> rd; + bits<5> rs; + bits<5> rt; +} + +// S Instruction Class in V2/V3. +// XTYPE Instruction Class in V4. +// Definition of the instruction class NOT CHANGED. +// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. +class SInst<dag outs, dag ins, string asmstr, list<dag> pattern> +//: InstHexagon<outs, ins, asmstr, pattern, cstr, !if(V4T, XTYPE_V4, M)> { + : InstHexagon<outs, ins, asmstr, pattern, "", S> { +// : InstHexagon<outs, ins, asmstr, pattern, "", S> { + bits<5> rd; + bits<5> rs; + bits<5> rt; +} + +// S Instruction Class in V2/V3. +// XTYPE Instruction Class in V4. +// Definition of the instruction class NOT CHANGED. +// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. +class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr> + : InstHexagon<outs, ins, asmstr, pattern, cstr, S> { +// : InstHexagon<outs, ins, asmstr, pattern, cstr, S> { +// : InstHexagon<outs, ins, asmstr, pattern, cstr, !if(V4T, XTYPE_V4, S)> { + bits<5> rd; + bits<5> rs; + bits<5> rt; +} + +// J Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class JType<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", J> { + bits<16> imm16; +} + +// JR Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class JRType<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", JR> { + bits<5> rs; + bits<5> pu; // Predicate register +} + +// CR Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", CR> { + bits<5> rs; + bits<10> imm10; +} + + +class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", PSEUDO>; + + +//----------------------------------------------------------------------------// +// Intruction Classes Definitions - +//----------------------------------------------------------------------------// + + +// +// ALU32 patterns +//. +class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern> + : ALU32Type<outs, ins, asmstr, pattern> { +} + +class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern> + : ALU32Type<outs, ins, asmstr, pattern> { + let rt{0-4} = 0; +} + +class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern> + : ALU32Type<outs, ins, asmstr, pattern> { + let rt{0-4} = 0; +} + +class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern> + : ALU32Type<outs, ins, asmstr, pattern> { + let rt{0-4} = 0; +} + +// +// ALU64 patterns. +// +class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern> + : ALU64Type<outs, ins, asmstr, pattern> { +} + +// J Type Instructions. +class JInst<dag outs, dag ins, string asmstr, list<dag> pattern> + : JType<outs, ins, asmstr, pattern> { +} + +// JR type Instructions. +class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern> + : JRType<outs, ins, asmstr, pattern> { +} + + +// Post increment ST Instruction. +class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern, string cstr> + : STInstPost<outs, ins, asmstr, pattern, cstr> { + let rt{0-4} = 0; +} + +// Post increment LD Instruction. +class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern, string cstr> + : LDInstPost<outs, ins, asmstr, pattern, cstr> { + let rt{0-4} = 0; +} + +//===----------------------------------------------------------------------===// +// V4 Instruction Format Definitions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrFormatsV4.td" + +//===----------------------------------------------------------------------===// +// V4 Instruction Format Definitions + +//===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td new file mode 100644 index 0000000..bd5e449 --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -0,0 +1,46 @@ +//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V4 instruction classes in TableGen format. +// +//===----------------------------------------------------------------------===// + +// +// NV type instructions. +// +class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", NV_V4> { + bits<5> rd; + bits<5> rs; + bits<13> imm13; +} + +// Definition of Post increment new value store. +class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr> + : InstHexagon<outs, ins, asmstr, pattern, cstr, NV_V4> { + bits<5> rd; + bits<5> rs; + bits<5> rt; + bits<13> imm13; +} + +// Post increment ST Instruction. +class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr> + : NVInstPost_V4<outs, ins, asmstr, pattern, cstr> { + let rt{0-4} = 0; +} + +class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", MEM_V4> { + bits<5> rd; + bits<5> rs; + bits<6> imm6; +} diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp new file mode 100644 index 0000000..69a50d7 --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -0,0 +1,1459 @@ +//=- HexagonInstrInfo.cpp - Hexagon Instruction Information -------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Hexagon implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "HexagonRegisterInfo.h" +#include "HexagonInstrInfo.h" +#include "HexagonSubtarget.h" +#include "Hexagon.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#define GET_INSTRINFO_CTOR +#include "HexagonGenInstrInfo.inc" + +#include <iostream> + + +using namespace llvm; + +/// +/// Constants for Hexagon instructions. +/// +const int Hexagon_MEMW_OFFSET_MAX = 4095; +const int Hexagon_MEMW_OFFSET_MIN = 4096; +const int Hexagon_MEMD_OFFSET_MAX = 8191; +const int Hexagon_MEMD_OFFSET_MIN = 8192; +const int Hexagon_MEMH_OFFSET_MAX = 2047; +const int Hexagon_MEMH_OFFSET_MIN = 2048; +const int Hexagon_MEMB_OFFSET_MAX = 1023; +const int Hexagon_MEMB_OFFSET_MIN = 1024; +const int Hexagon_ADDI_OFFSET_MAX = 32767; +const int Hexagon_ADDI_OFFSET_MIN = 32768; +const int Hexagon_MEMD_AUTOINC_MAX = 56; +const int Hexagon_MEMD_AUTOINC_MIN = 64; +const int Hexagon_MEMW_AUTOINC_MAX = 28; +const int Hexagon_MEMW_AUTOINC_MIN = 32; +const int Hexagon_MEMH_AUTOINC_MAX = 14; +const int Hexagon_MEMH_AUTOINC_MIN = 16; +const int Hexagon_MEMB_AUTOINC_MAX = 7; +const int Hexagon_MEMB_AUTOINC_MIN = 8; + + + +HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST) + : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), + RI(ST, *this), Subtarget(ST) { +} + + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + + + switch (MI->getOpcode()) { + case Hexagon::LDriw: + case Hexagon::LDrid: + case Hexagon::LDrih: + case Hexagon::LDrib: + case Hexagon::LDriub: + if (MI->getOperand(2).isFI() && + MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { + FrameIndex = MI->getOperand(2).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + + default: + break; + } + + return 0; +} + + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + case Hexagon::STriw: + case Hexagon::STrid: + case Hexagon::STrih: + case Hexagon::STrib: + if (MI->getOperand(2).isFI() && + MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { + FrameIndex = MI->getOperand(2).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + + default: + break; + } + + return 0; +} + + +unsigned +HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const{ + + int BOpc = Hexagon::JMP; + int BccOpc = Hexagon::JMP_Pred; + + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + + int regPos = 0; + // Check if ReverseBranchCondition has asked to reverse this branch + // If we want to reverse the branch an odd number of times, we want + // JMP_PredNot. + if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) { + BccOpc = Hexagon::JMP_PredNot; + regPos = 1; + } + + if (FBB == 0) { + if (Cond.empty()) { + // Due to a bug in TailMerging/CFG Optimization, we need to add a + // special case handling of a predicated jump followed by an + // unconditional jump. If not, Tail Merging and CFG Optimization go + // into an infinite loop. + MachineBasicBlock *NewTBB, *NewFBB; + SmallVector<MachineOperand, 4> Cond; + MachineInstr *Term = MBB.getFirstTerminator(); + if (isPredicated(Term) && !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, + false)) { + MachineBasicBlock *NextBB = + llvm::next(MachineFunction::iterator(&MBB)); + if (NewTBB == NextBB) { + ReverseBranchCondition(Cond); + RemoveBranch(MBB); + return InsertBranch(MBB, TBB, 0, Cond, DL); + } + } + BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); + } else { + BuildMI(&MBB, DL, + get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB); + } + return 1; + } + + BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB); + BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); + + return 2; +} + + +bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + FBB = NULL; + + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) + return false; + + // A basic block may looks like this: + // + // [ insn + // EH_LABEL + // insn + // insn + // insn + // EH_LABEL + // insn ] + // + // It has two succs but does not have a terminator + // Don't know how to handle it. + do { + --I; + if (I->isEHLabel()) + return true; + } while (I != MBB.begin()); + + I = MBB.end(); + --I; + + while (I->isDebugValue()) { + if (I == MBB.begin()) + return false; + --I; + } + if (!isUnpredicatedTerminator(I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (LastInst->getOpcode() == Hexagon::JMP) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } + if (LastInst->getOpcode() == Hexagon::JMP_Pred) { + // Block ends with fall-through true condbranch. + TBB = LastInst->getOperand(1).getMBB(); + Cond.push_back(LastInst->getOperand(0)); + return false; + } + if (LastInst->getOpcode() == Hexagon::JMP_PredNot) { + // Block ends with fall-through false condbranch. + TBB = LastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(0)); + Cond.push_back(LastInst->getOperand(0)); + return false; + } + // Otherwise, don't know what this is. + return true; + } + + // Get the instruction before it if it's a terminator. + MachineInstr *SecondLastInst = I; + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && + isUnpredicatedTerminator(--I)) + return true; + + // If the block ends with Hexagon::BRCOND and Hexagon:JMP, handle it. + if (((SecondLastInst->getOpcode() == Hexagon::BRCOND) || + (SecondLastInst->getOpcode() == Hexagon::JMP_Pred)) && + LastInst->getOpcode() == Hexagon::JMP) { + TBB = SecondLastInst->getOperand(1).getMBB(); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with Hexagon::JMP_PredNot and Hexagon:JMP, handle it. + if ((SecondLastInst->getOpcode() == Hexagon::JMP_PredNot) && + LastInst->getOpcode() == Hexagon::JMP) { + TBB = SecondLastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(0)); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with two Hexagon:JMPs, handle it. The second one is not + // executed, so remove it. + if (SecondLastInst->getOpcode() == Hexagon::JMP && + LastInst->getOpcode() == Hexagon::JMP) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // Otherwise, can't handle this. + return true; +} + + +unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + int BOpc = Hexagon::JMP; + int BccOpc = Hexagon::JMP_Pred; + int BccOpcNot = Hexagon::JMP_PredNot; + + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) return 0; + --I; + if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc && + I->getOpcode() != BccOpcNot) + return 0; + + // Remove the branch. + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) return 1; + --I; + if (I->getOpcode() != BccOpc && I->getOpcode() != BccOpcNot) + return 1; + + // Remove the branch. + I->eraseFromParent(); + return 2; +} + + +void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::TFR), DestReg).addReg(SrcReg); + return; + } + if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::TFR_64), DestReg).addReg(SrcReg); + return; + } + if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) { + // Map Pd = Ps to Pd = or(Ps, Ps). + BuildMI(MBB, I, DL, get(Hexagon::OR_pp), + DestReg).addReg(SrcReg).addReg(SrcReg); + return; + } + if (Hexagon::DoubleRegsRegClass.contains(DestReg, SrcReg)) { + // We can have an overlap between single and double reg: r1:0 = r0. + if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) { + // r1:0 = r0 + BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg, + Hexagon::subreg_hireg))).addImm(0); + } else { + // r1:0 = r1 or no overlap. + BuildMI(MBB, I, DL, get(Hexagon::TFR), (RI.getSubReg(DestReg, + Hexagon::subreg_loreg))).addReg(SrcReg); + BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg, + Hexagon::subreg_hireg))).addImm(0); + } + return; + } + if (Hexagon::CRRegsRegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, I, DL, get(Hexagon::TFCR), DestReg).addReg(SrcReg); + return; + } + + assert (0 && "Unimplemented"); +} + + +void HexagonInstrInfo:: +storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + + DebugLoc DL = MBB.findDebugLoc(I); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FI); + + MachineMemOperand *MMO = + MF.getMachineMemOperand( + MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MachineMemOperand::MOStore, + MFI.getObjectSize(FI), + Align); + + if (Hexagon::IntRegsRegisterClass->hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::STriw)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + } else if (Hexagon::DoubleRegsRegisterClass->hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::STrid)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + } else if (Hexagon::PredRegsRegisterClass->hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::STriw_pred)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + } else { + assert(0 && "Unimplemented"); + } +} + + +void HexagonInstrInfo::storeRegToAddr( + MachineFunction &MF, unsigned SrcReg, + bool isKill, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const +{ + assert(0 && "Unimplemented"); + return; +} + + +void HexagonInstrInfo:: +loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBB.findDebugLoc(I); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FI); + + MachineMemOperand *MMO = + MF.getMachineMemOperand( + MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), + Align); + + if (RC == Hexagon::IntRegsRegisterClass) { + BuildMI(MBB, I, DL, get(Hexagon::LDriw), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else if (RC == Hexagon::DoubleRegsRegisterClass) { + BuildMI(MBB, I, DL, get(Hexagon::LDrid), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else if (RC == Hexagon::PredRegsRegisterClass) { + BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else { + assert(0 && "Can't store this register to stack slot"); + } +} + + +void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const { + assert(0 && "Unimplemented"); +} + + +MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + int FI) const { + // Hexagon_TODO: Implement. + return(0); +} + + +unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { + + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetRegisterClass *TRC; + if (VT == MVT::i1) { + TRC = Hexagon::PredRegsRegisterClass; + } else if (VT == MVT::i32) { + TRC = Hexagon::IntRegsRegisterClass; + } else if (VT == MVT::i64) { + TRC = Hexagon::DoubleRegsRegisterClass; + } else { + assert(0 && "Cannot handle this register class"); + } + + unsigned NewReg = RegInfo.createVirtualRegister(TRC); + return NewReg; +} + + +bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { + bool isPred = MI->getDesc().isPredicable(); + + if (!isPred) + return false; + + const int Opc = MI->getOpcode(); + + switch(Opc) { + case Hexagon::TFRI: + return isInt<12>(MI->getOperand(1).getImm()); + + case Hexagon::STrid: + case Hexagon::STrid_indexed: + return isShiftedUInt<6,3>(MI->getOperand(1).getImm()); + + case Hexagon::STriw: + case Hexagon::STriw_indexed: + case Hexagon::STriw_nv_V4: + return isShiftedUInt<6,2>(MI->getOperand(1).getImm()); + + case Hexagon::STrih: + case Hexagon::STrih_indexed: + case Hexagon::STrih_nv_V4: + return isShiftedUInt<6,1>(MI->getOperand(1).getImm()); + + case Hexagon::STrib: + case Hexagon::STrib_indexed: + case Hexagon::STrib_nv_V4: + return isUInt<6>(MI->getOperand(1).getImm()); + + case Hexagon::LDrid: + case Hexagon::LDrid_indexed: + return isShiftedUInt<6,3>(MI->getOperand(2).getImm()); + + case Hexagon::LDriw: + case Hexagon::LDriw_indexed: + return isShiftedUInt<6,2>(MI->getOperand(2).getImm()); + + case Hexagon::LDrih: + case Hexagon::LDriuh: + case Hexagon::LDrih_indexed: + case Hexagon::LDriuh_indexed: + return isShiftedUInt<6,1>(MI->getOperand(2).getImm()); + + case Hexagon::LDrib: + case Hexagon::LDriub: + case Hexagon::LDrib_indexed: + case Hexagon::LDriub_indexed: + return isUInt<6>(MI->getOperand(2).getImm()); + + case Hexagon::POST_LDrid: + return isShiftedInt<4,3>(MI->getOperand(3).getImm()); + + case Hexagon::POST_LDriw: + return isShiftedInt<4,2>(MI->getOperand(3).getImm()); + + case Hexagon::POST_LDrih: + case Hexagon::POST_LDriuh: + return isShiftedInt<4,1>(MI->getOperand(3).getImm()); + + case Hexagon::POST_LDrib: + case Hexagon::POST_LDriub: + return isInt<4>(MI->getOperand(3).getImm()); + + case Hexagon::STrib_imm_V4: + case Hexagon::STrih_imm_V4: + case Hexagon::STriw_imm_V4: + return (isUInt<6>(MI->getOperand(1).getImm()) && + isInt<6>(MI->getOperand(2).getImm())); + + case Hexagon::ADD_ri: + return isInt<8>(MI->getOperand(2).getImm()); + + case Hexagon::ASLH: + case Hexagon::ASRH: + case Hexagon::SXTB: + case Hexagon::SXTH: + case Hexagon::ZXTB: + case Hexagon::ZXTH: + return Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4; + + case Hexagon::JMPR: + return false; + return true; + + default: + return true; + } + + return true; +} + + +int HexagonInstrInfo:: +getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { + switch(Opc) { + case Hexagon::TFR: + return !invertPredicate ? Hexagon::TFR_cPt : + Hexagon::TFR_cNotPt; + case Hexagon::TFRI: + return !invertPredicate ? Hexagon::TFRI_cPt : + Hexagon::TFRI_cNotPt; + case Hexagon::JMP: + return !invertPredicate ? Hexagon::JMP_Pred : + Hexagon::JMP_PredNot; + case Hexagon::ADD_ri: + return !invertPredicate ? Hexagon::ADD_ri_cPt : + Hexagon::ADD_ri_cNotPt; + case Hexagon::ADD_rr: + return !invertPredicate ? Hexagon::ADD_rr_cPt : + Hexagon::ADD_rr_cNotPt; + case Hexagon::XOR_rr: + return !invertPredicate ? Hexagon::XOR_rr_cPt : + Hexagon::XOR_rr_cNotPt; + case Hexagon::AND_rr: + return !invertPredicate ? Hexagon::AND_rr_cPt : + Hexagon::AND_rr_cNotPt; + case Hexagon::OR_rr: + return !invertPredicate ? Hexagon::OR_rr_cPt : + Hexagon::OR_rr_cNotPt; + case Hexagon::SUB_rr: + return !invertPredicate ? Hexagon::SUB_rr_cPt : + Hexagon::SUB_rr_cNotPt; + case Hexagon::COMBINE_rr: + return !invertPredicate ? Hexagon::COMBINE_rr_cPt : + Hexagon::COMBINE_rr_cNotPt; + case Hexagon::ASLH: + return !invertPredicate ? Hexagon::ASLH_cPt_V4 : + Hexagon::ASLH_cNotPt_V4; + case Hexagon::ASRH: + return !invertPredicate ? Hexagon::ASRH_cPt_V4 : + Hexagon::ASRH_cNotPt_V4; + case Hexagon::SXTB: + return !invertPredicate ? Hexagon::SXTB_cPt_V4 : + Hexagon::SXTB_cNotPt_V4; + case Hexagon::SXTH: + return !invertPredicate ? Hexagon::SXTH_cPt_V4 : + Hexagon::SXTH_cNotPt_V4; + case Hexagon::ZXTB: + return !invertPredicate ? Hexagon::ZXTB_cPt_V4 : + Hexagon::ZXTB_cNotPt_V4; + case Hexagon::ZXTH: + return !invertPredicate ? Hexagon::ZXTH_cPt_V4 : + Hexagon::ZXTH_cNotPt_V4; + + case Hexagon::JMPR: + return !invertPredicate ? Hexagon::JMPR_cPt : + Hexagon::JMPR_cNotPt; + + // V4 indexed+scaled load. + case Hexagon::LDrid_indexed_V4: + return !invertPredicate ? Hexagon::LDrid_indexed_cPt_V4 : + Hexagon::LDrid_indexed_cNotPt_V4; + case Hexagon::LDrid_indexed_shl_V4: + return !invertPredicate ? Hexagon::LDrid_indexed_shl_cPt_V4 : + Hexagon::LDrid_indexed_shl_cNotPt_V4; + case Hexagon::LDrib_indexed_V4: + return !invertPredicate ? Hexagon::LDrib_indexed_cPt_V4 : + Hexagon::LDrib_indexed_cNotPt_V4; + case Hexagon::LDriub_indexed_V4: + return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 : + Hexagon::LDriub_indexed_cNotPt_V4; + case Hexagon::LDriub_ae_indexed_V4: + return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 : + Hexagon::LDriub_indexed_cNotPt_V4; + case Hexagon::LDrib_indexed_shl_V4: + return !invertPredicate ? Hexagon::LDrib_indexed_shl_cPt_V4 : + Hexagon::LDrib_indexed_shl_cNotPt_V4; + case Hexagon::LDriub_indexed_shl_V4: + return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 : + Hexagon::LDriub_indexed_shl_cNotPt_V4; + case Hexagon::LDriub_ae_indexed_shl_V4: + return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 : + Hexagon::LDriub_indexed_shl_cNotPt_V4; + case Hexagon::LDrih_indexed_V4: + return !invertPredicate ? Hexagon::LDrih_indexed_cPt_V4 : + Hexagon::LDrih_indexed_cNotPt_V4; + case Hexagon::LDriuh_indexed_V4: + return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 : + Hexagon::LDriuh_indexed_cNotPt_V4; + case Hexagon::LDriuh_ae_indexed_V4: + return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 : + Hexagon::LDriuh_indexed_cNotPt_V4; + case Hexagon::LDrih_indexed_shl_V4: + return !invertPredicate ? Hexagon::LDrih_indexed_shl_cPt_V4 : + Hexagon::LDrih_indexed_shl_cNotPt_V4; + case Hexagon::LDriuh_indexed_shl_V4: + return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 : + Hexagon::LDriuh_indexed_shl_cNotPt_V4; + case Hexagon::LDriuh_ae_indexed_shl_V4: + return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 : + Hexagon::LDriuh_indexed_shl_cNotPt_V4; + case Hexagon::LDriw_indexed_V4: + return !invertPredicate ? Hexagon::LDriw_indexed_cPt_V4 : + Hexagon::LDriw_indexed_cNotPt_V4; + case Hexagon::LDriw_indexed_shl_V4: + return !invertPredicate ? Hexagon::LDriw_indexed_shl_cPt_V4 : + Hexagon::LDriw_indexed_shl_cNotPt_V4; + // Byte. + case Hexagon::POST_STbri: + return !invertPredicate ? Hexagon::POST_STbri_cPt : + Hexagon::POST_STbri_cNotPt; + case Hexagon::STrib: + return !invertPredicate ? Hexagon::STrib_cPt : + Hexagon::STrib_cNotPt; + case Hexagon::STrib_indexed: + return !invertPredicate ? Hexagon::STrib_indexed_cPt : + Hexagon::STrib_indexed_cNotPt; + case Hexagon::STrib_imm_V4: + return !invertPredicate ? Hexagon::STrib_imm_cPt_V4 : + Hexagon::STrib_imm_cNotPt_V4; + case Hexagon::STrib_indexed_shl_V4: + return !invertPredicate ? Hexagon::STrib_indexed_shl_cPt_V4 : + Hexagon::STrib_indexed_shl_cNotPt_V4; + // Halfword. + case Hexagon::POST_SThri: + return !invertPredicate ? Hexagon::POST_SThri_cPt : + Hexagon::POST_SThri_cNotPt; + case Hexagon::STrih: + return !invertPredicate ? Hexagon::STrih_cPt : + Hexagon::STrih_cNotPt; + case Hexagon::STrih_indexed: + return !invertPredicate ? Hexagon::STrih_indexed_cPt : + Hexagon::STrih_indexed_cNotPt; + case Hexagon::STrih_imm_V4: + return !invertPredicate ? Hexagon::STrih_imm_cPt_V4 : + Hexagon::STrih_imm_cNotPt_V4; + case Hexagon::STrih_indexed_shl_V4: + return !invertPredicate ? Hexagon::STrih_indexed_shl_cPt_V4 : + Hexagon::STrih_indexed_shl_cNotPt_V4; + // Word. + case Hexagon::POST_STwri: + return !invertPredicate ? Hexagon::POST_STwri_cPt : + Hexagon::POST_STwri_cNotPt; + case Hexagon::STriw: + return !invertPredicate ? Hexagon::STriw_cPt : + Hexagon::STriw_cNotPt; + case Hexagon::STriw_indexed: + return !invertPredicate ? Hexagon::STriw_indexed_cPt : + Hexagon::STriw_indexed_cNotPt; + case Hexagon::STriw_indexed_shl_V4: + return !invertPredicate ? Hexagon::STriw_indexed_shl_cPt_V4 : + Hexagon::STriw_indexed_shl_cNotPt_V4; + case Hexagon::STriw_imm_V4: + return !invertPredicate ? Hexagon::STriw_imm_cPt_V4 : + Hexagon::STriw_imm_cNotPt_V4; + // Double word. + case Hexagon::POST_STdri: + return !invertPredicate ? Hexagon::POST_STdri_cPt : + Hexagon::POST_STdri_cNotPt; + case Hexagon::STrid: + return !invertPredicate ? Hexagon::STrid_cPt : + Hexagon::STrid_cNotPt; + case Hexagon::STrid_indexed: + return !invertPredicate ? Hexagon::STrid_indexed_cPt : + Hexagon::STrid_indexed_cNotPt; + case Hexagon::STrid_indexed_shl_V4: + return !invertPredicate ? Hexagon::STrid_indexed_shl_cPt_V4 : + Hexagon::STrid_indexed_shl_cNotPt_V4; + // Load. + case Hexagon::LDrid: + return !invertPredicate ? Hexagon::LDrid_cPt : + Hexagon::LDrid_cNotPt; + case Hexagon::LDriw: + return !invertPredicate ? Hexagon::LDriw_cPt : + Hexagon::LDriw_cNotPt; + case Hexagon::LDrih: + return !invertPredicate ? Hexagon::LDrih_cPt : + Hexagon::LDrih_cNotPt; + case Hexagon::LDriuh: + return !invertPredicate ? Hexagon::LDriuh_cPt : + Hexagon::LDriuh_cNotPt; + case Hexagon::LDrib: + return !invertPredicate ? Hexagon::LDrib_cPt : + Hexagon::LDrib_cNotPt; + case Hexagon::LDriub: + return !invertPredicate ? Hexagon::LDriub_cPt : + Hexagon::LDriub_cNotPt; + case Hexagon::LDriubit: + return !invertPredicate ? Hexagon::LDriub_cPt : + Hexagon::LDriub_cNotPt; + // Load Indexed. + case Hexagon::LDrid_indexed: + return !invertPredicate ? Hexagon::LDrid_indexed_cPt : + Hexagon::LDrid_indexed_cNotPt; + case Hexagon::LDriw_indexed: + return !invertPredicate ? Hexagon::LDriw_indexed_cPt : + Hexagon::LDriw_indexed_cNotPt; + case Hexagon::LDrih_indexed: + return !invertPredicate ? Hexagon::LDrih_indexed_cPt : + Hexagon::LDrih_indexed_cNotPt; + case Hexagon::LDriuh_indexed: + return !invertPredicate ? Hexagon::LDriuh_indexed_cPt : + Hexagon::LDriuh_indexed_cNotPt; + case Hexagon::LDrib_indexed: + return !invertPredicate ? Hexagon::LDrib_indexed_cPt : + Hexagon::LDrib_indexed_cNotPt; + case Hexagon::LDriub_indexed: + return !invertPredicate ? Hexagon::LDriub_indexed_cPt : + Hexagon::LDriub_indexed_cNotPt; + // Post Increment Load. + case Hexagon::POST_LDrid: + return !invertPredicate ? Hexagon::POST_LDrid_cPt : + Hexagon::POST_LDrid_cNotPt; + case Hexagon::POST_LDriw: + return !invertPredicate ? Hexagon::POST_LDriw_cPt : + Hexagon::POST_LDriw_cNotPt; + case Hexagon::POST_LDrih: + return !invertPredicate ? Hexagon::POST_LDrih_cPt : + Hexagon::POST_LDrih_cNotPt; + case Hexagon::POST_LDriuh: + return !invertPredicate ? Hexagon::POST_LDriuh_cPt : + Hexagon::POST_LDriuh_cNotPt; + case Hexagon::POST_LDrib: + return !invertPredicate ? Hexagon::POST_LDrib_cPt : + Hexagon::POST_LDrib_cNotPt; + case Hexagon::POST_LDriub: + return !invertPredicate ? Hexagon::POST_LDriub_cPt : + Hexagon::POST_LDriub_cNotPt; + // DEALLOC_RETURN. + case Hexagon::DEALLOC_RET_V4: + return !invertPredicate ? Hexagon::DEALLOC_RET_cPt_V4 : + Hexagon::DEALLOC_RET_cNotPt_V4; + default: + assert(false && "Unexpected predicable instruction"); + } +} + + +bool HexagonInstrInfo:: +PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Cond) const { + int Opc = MI->getOpcode(); + assert (isPredicable(MI) && "Expected predicable instruction"); + bool invertJump = (!Cond.empty() && Cond[0].isImm() && + (Cond[0].getImm() == 0)); + MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump))); + // + // This assumes that the predicate is always the first operand + // in the set of inputs. + // + MI->addOperand(MI->getOperand(MI->getNumOperands()-1)); + int oper; + for (oper = MI->getNumOperands() - 3; oper >= 0; --oper) { + MachineOperand MO = MI->getOperand(oper); + if ((MO.isReg() && !MO.isUse() && !MO.isImplicit())) { + break; + } + + if (MO.isReg()) { + MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(), + MO.isImplicit(), MO.isKill(), + MO.isDead(), MO.isUndef(), + MO.isDebug()); + } else if (MO.isImm()) { + MI->getOperand(oper+1).ChangeToImmediate(MO.getImm()); + } else { + assert(false && "Unexpected operand type"); + } + } + + int regPos = invertJump ? 1 : 0; + MachineOperand PredMO = Cond[regPos]; + MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(), + PredMO.isImplicit(), PredMO.isKill(), + PredMO.isDead(), PredMO.isUndef(), + PredMO.isDebug()); + + return true; +} + + +bool +HexagonInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumCyles, + unsigned ExtraPredCycles, + const BranchProbability &Probability) const { + return true; +} + + +bool +HexagonInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumTCycles, + unsigned ExtraTCycles, + MachineBasicBlock &FMBB, + unsigned NumFCycles, + unsigned ExtraFCycles, + const BranchProbability &Probability) const { + return true; +} + + +bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::TFR_cPt: + case Hexagon::TFR_cNotPt: + case Hexagon::TFRI_cPt: + case Hexagon::TFRI_cNotPt: + case Hexagon::TFR_cdnPt: + case Hexagon::TFR_cdnNotPt: + case Hexagon::TFRI_cdnPt: + case Hexagon::TFRI_cdnNotPt: + return true; + + case Hexagon::JMP_Pred: + case Hexagon::JMP_PredNot: + case Hexagon::BRCOND: + case Hexagon::JMP_PredPt: + case Hexagon::JMP_PredNotPt: + case Hexagon::JMP_PredPnt: + case Hexagon::JMP_PredNotPnt: + return true; + + case Hexagon::LDrid_indexed_cPt_V4 : + case Hexagon::LDrid_indexed_cdnPt_V4 : + case Hexagon::LDrid_indexed_cNotPt_V4 : + case Hexagon::LDrid_indexed_cdnNotPt_V4 : + case Hexagon::LDrid_indexed_shl_cPt_V4 : + case Hexagon::LDrid_indexed_shl_cdnPt_V4 : + case Hexagon::LDrid_indexed_shl_cNotPt_V4 : + case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 : + case Hexagon::LDrib_indexed_cPt_V4 : + case Hexagon::LDrib_indexed_cdnPt_V4 : + case Hexagon::LDrib_indexed_cNotPt_V4 : + case Hexagon::LDrib_indexed_cdnNotPt_V4 : + case Hexagon::LDrib_indexed_shl_cPt_V4 : + case Hexagon::LDrib_indexed_shl_cdnPt_V4 : + case Hexagon::LDrib_indexed_shl_cNotPt_V4 : + case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 : + case Hexagon::LDriub_indexed_cPt_V4 : + case Hexagon::LDriub_indexed_cdnPt_V4 : + case Hexagon::LDriub_indexed_cNotPt_V4 : + case Hexagon::LDriub_indexed_cdnNotPt_V4 : + case Hexagon::LDriub_indexed_shl_cPt_V4 : + case Hexagon::LDriub_indexed_shl_cdnPt_V4 : + case Hexagon::LDriub_indexed_shl_cNotPt_V4 : + case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 : + case Hexagon::LDrih_indexed_cPt_V4 : + case Hexagon::LDrih_indexed_cdnPt_V4 : + case Hexagon::LDrih_indexed_cNotPt_V4 : + case Hexagon::LDrih_indexed_cdnNotPt_V4 : + case Hexagon::LDrih_indexed_shl_cPt_V4 : + case Hexagon::LDrih_indexed_shl_cdnPt_V4 : + case Hexagon::LDrih_indexed_shl_cNotPt_V4 : + case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 : + case Hexagon::LDriuh_indexed_cPt_V4 : + case Hexagon::LDriuh_indexed_cdnPt_V4 : + case Hexagon::LDriuh_indexed_cNotPt_V4 : + case Hexagon::LDriuh_indexed_cdnNotPt_V4 : + case Hexagon::LDriuh_indexed_shl_cPt_V4 : + case Hexagon::LDriuh_indexed_shl_cdnPt_V4 : + case Hexagon::LDriuh_indexed_shl_cNotPt_V4 : + case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 : + case Hexagon::LDriw_indexed_cPt_V4 : + case Hexagon::LDriw_indexed_cdnPt_V4 : + case Hexagon::LDriw_indexed_cNotPt_V4 : + case Hexagon::LDriw_indexed_cdnNotPt_V4 : + case Hexagon::LDriw_indexed_shl_cPt_V4 : + case Hexagon::LDriw_indexed_shl_cdnPt_V4 : + case Hexagon::LDriw_indexed_shl_cNotPt_V4 : + case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 : + return true; + + case Hexagon::LDrid_cPt : + case Hexagon::LDrid_cNotPt : + case Hexagon::LDrid_indexed_cPt : + case Hexagon::LDrid_indexed_cNotPt : + case Hexagon::POST_LDrid_cPt : + case Hexagon::POST_LDrid_cNotPt : + case Hexagon::LDriw_cPt : + case Hexagon::LDriw_cNotPt : + case Hexagon::LDriw_indexed_cPt : + case Hexagon::LDriw_indexed_cNotPt : + case Hexagon::POST_LDriw_cPt : + case Hexagon::POST_LDriw_cNotPt : + case Hexagon::LDrih_cPt : + case Hexagon::LDrih_cNotPt : + case Hexagon::LDrih_indexed_cPt : + case Hexagon::LDrih_indexed_cNotPt : + case Hexagon::POST_LDrih_cPt : + case Hexagon::POST_LDrih_cNotPt : + case Hexagon::LDrib_cPt : + case Hexagon::LDrib_cNotPt : + case Hexagon::LDrib_indexed_cPt : + case Hexagon::LDrib_indexed_cNotPt : + case Hexagon::POST_LDrib_cPt : + case Hexagon::POST_LDrib_cNotPt : + case Hexagon::LDriuh_cPt : + case Hexagon::LDriuh_cNotPt : + case Hexagon::LDriuh_indexed_cPt : + case Hexagon::LDriuh_indexed_cNotPt : + case Hexagon::POST_LDriuh_cPt : + case Hexagon::POST_LDriuh_cNotPt : + case Hexagon::LDriub_cPt : + case Hexagon::LDriub_cNotPt : + case Hexagon::LDriub_indexed_cPt : + case Hexagon::LDriub_indexed_cNotPt : + case Hexagon::POST_LDriub_cPt : + case Hexagon::POST_LDriub_cNotPt : + return true; + + case Hexagon::LDrid_cdnPt : + case Hexagon::LDrid_cdnNotPt : + case Hexagon::LDrid_indexed_cdnPt : + case Hexagon::LDrid_indexed_cdnNotPt : + case Hexagon::POST_LDrid_cdnPt_V4 : + case Hexagon::POST_LDrid_cdnNotPt_V4 : + case Hexagon::LDriw_cdnPt : + case Hexagon::LDriw_cdnNotPt : + case Hexagon::LDriw_indexed_cdnPt : + case Hexagon::LDriw_indexed_cdnNotPt : + case Hexagon::POST_LDriw_cdnPt_V4 : + case Hexagon::POST_LDriw_cdnNotPt_V4 : + case Hexagon::LDrih_cdnPt : + case Hexagon::LDrih_cdnNotPt : + case Hexagon::LDrih_indexed_cdnPt : + case Hexagon::LDrih_indexed_cdnNotPt : + case Hexagon::POST_LDrih_cdnPt_V4 : + case Hexagon::POST_LDrih_cdnNotPt_V4 : + case Hexagon::LDrib_cdnPt : + case Hexagon::LDrib_cdnNotPt : + case Hexagon::LDrib_indexed_cdnPt : + case Hexagon::LDrib_indexed_cdnNotPt : + case Hexagon::POST_LDrib_cdnPt_V4 : + case Hexagon::POST_LDrib_cdnNotPt_V4 : + case Hexagon::LDriuh_cdnPt : + case Hexagon::LDriuh_cdnNotPt : + case Hexagon::LDriuh_indexed_cdnPt : + case Hexagon::LDriuh_indexed_cdnNotPt : + case Hexagon::POST_LDriuh_cdnPt_V4 : + case Hexagon::POST_LDriuh_cdnNotPt_V4 : + case Hexagon::LDriub_cdnPt : + case Hexagon::LDriub_cdnNotPt : + case Hexagon::LDriub_indexed_cdnPt : + case Hexagon::LDriub_indexed_cdnNotPt : + case Hexagon::POST_LDriub_cdnPt_V4 : + case Hexagon::POST_LDriub_cdnNotPt_V4 : + return true; + + case Hexagon::ADD_ri_cPt: + case Hexagon::ADD_ri_cNotPt: + case Hexagon::ADD_ri_cdnPt: + case Hexagon::ADD_ri_cdnNotPt: + case Hexagon::ADD_rr_cPt: + case Hexagon::ADD_rr_cNotPt: + case Hexagon::ADD_rr_cdnPt: + case Hexagon::ADD_rr_cdnNotPt: + case Hexagon::XOR_rr_cPt: + case Hexagon::XOR_rr_cNotPt: + case Hexagon::XOR_rr_cdnPt: + case Hexagon::XOR_rr_cdnNotPt: + case Hexagon::AND_rr_cPt: + case Hexagon::AND_rr_cNotPt: + case Hexagon::AND_rr_cdnPt: + case Hexagon::AND_rr_cdnNotPt: + case Hexagon::OR_rr_cPt: + case Hexagon::OR_rr_cNotPt: + case Hexagon::OR_rr_cdnPt: + case Hexagon::OR_rr_cdnNotPt: + case Hexagon::SUB_rr_cPt: + case Hexagon::SUB_rr_cNotPt: + case Hexagon::SUB_rr_cdnPt: + case Hexagon::SUB_rr_cdnNotPt: + case Hexagon::COMBINE_rr_cPt: + case Hexagon::COMBINE_rr_cNotPt: + case Hexagon::COMBINE_rr_cdnPt: + case Hexagon::COMBINE_rr_cdnNotPt: + return true; + + case Hexagon::ASLH_cPt_V4: + case Hexagon::ASLH_cNotPt_V4: + case Hexagon::ASRH_cPt_V4: + case Hexagon::ASRH_cNotPt_V4: + case Hexagon::SXTB_cPt_V4: + case Hexagon::SXTB_cNotPt_V4: + case Hexagon::SXTH_cPt_V4: + case Hexagon::SXTH_cNotPt_V4: + case Hexagon::ZXTB_cPt_V4: + case Hexagon::ZXTB_cNotPt_V4: + case Hexagon::ZXTH_cPt_V4: + case Hexagon::ZXTH_cNotPt_V4: + return true; + + case Hexagon::ASLH_cdnPt_V4: + case Hexagon::ASLH_cdnNotPt_V4: + case Hexagon::ASRH_cdnPt_V4: + case Hexagon::ASRH_cdnNotPt_V4: + case Hexagon::SXTB_cdnPt_V4: + case Hexagon::SXTB_cdnNotPt_V4: + case Hexagon::SXTH_cdnPt_V4: + case Hexagon::SXTH_cdnNotPt_V4: + case Hexagon::ZXTB_cdnPt_V4: + case Hexagon::ZXTB_cdnNotPt_V4: + case Hexagon::ZXTH_cdnPt_V4: + case Hexagon::ZXTH_cdnNotPt_V4: + return true; + + default: + return false; + } +} + + +bool +HexagonInstrInfo::DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const { + for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) { + MachineOperand MO = MI->getOperand(oper); + if (MO.isReg() && MO.isDef()) { + const TargetRegisterClass* RC = RI.getMinimalPhysRegClass(MO.getReg()); + if (RC == Hexagon::PredRegsRegisterClass) { + Pred.push_back(MO); + return true; + } + } + } + return false; +} + + +bool +HexagonInstrInfo:: +SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const { + // TODO: Fix this + return false; +} + + +// +// We indicate that we want to reverse the branch by +// inserting a 0 at the beginning of the Cond vector. +// +bool HexagonInstrInfo:: +ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { + if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) { + Cond.erase(Cond.begin()); + } else { + Cond.insert(Cond.begin(), MachineOperand::CreateImm(0)); + } + return false; +} + + +bool HexagonInstrInfo:: +isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumInstrs, + const BranchProbability &Probability) const { + return (NumInstrs <= 4); +} + +bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::DEALLOC_RET_V4 : + case Hexagon::DEALLOC_RET_cPt_V4 : + case Hexagon::DEALLOC_RET_cNotPt_V4 : + case Hexagon::DEALLOC_RET_cdnPnt_V4 : + case Hexagon::DEALLOC_RET_cNotdnPnt_V4 : + case Hexagon::DEALLOC_RET_cdnPt_V4 : + case Hexagon::DEALLOC_RET_cNotdnPt_V4 : + return true; + } + return false; +} + + +bool HexagonInstrInfo:: +isValidOffset(const int Opcode, const int Offset) const { + // This function is to check whether the "Offset" is in the correct range of + // the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is + // inserted to calculate the final address. Due to this reason, the function + // assumes that the "Offset" has correct alignment. + + switch(Opcode) { + + case Hexagon::LDriw: + case Hexagon::STriw: + case Hexagon::STriwt: + assert((Offset % 4 == 0) && "Offset has incorrect alignment"); + return (Offset >= Hexagon_MEMW_OFFSET_MIN) && + (Offset <= Hexagon_MEMW_OFFSET_MAX); + + case Hexagon::LDrid: + case Hexagon::STrid: + assert((Offset % 8 == 0) && "Offset has incorrect alignment"); + return (Offset >= Hexagon_MEMD_OFFSET_MIN) && + (Offset <= Hexagon_MEMD_OFFSET_MAX); + + case Hexagon::LDrih: + case Hexagon::LDriuh: + case Hexagon::STrih: + case Hexagon::LDrih_ae: + assert((Offset % 2 == 0) && "Offset has incorrect alignment"); + return (Offset >= Hexagon_MEMH_OFFSET_MIN) && + (Offset <= Hexagon_MEMH_OFFSET_MAX); + + case Hexagon::LDrib: + case Hexagon::STrib: + case Hexagon::LDriub: + case Hexagon::LDriubit: + case Hexagon::LDrib_ae: + case Hexagon::LDriub_ae: + return (Offset >= Hexagon_MEMB_OFFSET_MIN) && + (Offset <= Hexagon_MEMB_OFFSET_MAX); + + case Hexagon::ADD_ri: + case Hexagon::TFR_FI: + return (Offset >= Hexagon_ADDI_OFFSET_MIN) && + (Offset <= Hexagon_ADDI_OFFSET_MAX); + + case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 : + case Hexagon::MEMw_ADDi_indexed_MEM_V4 : + case Hexagon::MEMw_SUBi_indexed_MEM_V4 : + case Hexagon::MEMw_ADDr_indexed_MEM_V4 : + case Hexagon::MEMw_SUBr_indexed_MEM_V4 : + case Hexagon::MEMw_ANDr_indexed_MEM_V4 : + case Hexagon::MEMw_ORr_indexed_MEM_V4 : + case Hexagon::MEMw_ADDSUBi_MEM_V4 : + case Hexagon::MEMw_ADDi_MEM_V4 : + case Hexagon::MEMw_SUBi_MEM_V4 : + case Hexagon::MEMw_ADDr_MEM_V4 : + case Hexagon::MEMw_SUBr_MEM_V4 : + case Hexagon::MEMw_ANDr_MEM_V4 : + case Hexagon::MEMw_ORr_MEM_V4 : + assert ((Offset % 4) == 0 && "MEMOPw offset is not aligned correctly." ); + return (0 <= Offset && Offset <= 255); + + case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 : + case Hexagon::MEMh_ADDi_indexed_MEM_V4 : + case Hexagon::MEMh_SUBi_indexed_MEM_V4 : + case Hexagon::MEMh_ADDr_indexed_MEM_V4 : + case Hexagon::MEMh_SUBr_indexed_MEM_V4 : + case Hexagon::MEMh_ANDr_indexed_MEM_V4 : + case Hexagon::MEMh_ORr_indexed_MEM_V4 : + case Hexagon::MEMh_ADDSUBi_MEM_V4 : + case Hexagon::MEMh_ADDi_MEM_V4 : + case Hexagon::MEMh_SUBi_MEM_V4 : + case Hexagon::MEMh_ADDr_MEM_V4 : + case Hexagon::MEMh_SUBr_MEM_V4 : + case Hexagon::MEMh_ANDr_MEM_V4 : + case Hexagon::MEMh_ORr_MEM_V4 : + assert ((Offset % 2) == 0 && "MEMOPh offset is not aligned correctly." ); + return (0 <= Offset && Offset <= 127); + + case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 : + case Hexagon::MEMb_ADDi_indexed_MEM_V4 : + case Hexagon::MEMb_SUBi_indexed_MEM_V4 : + case Hexagon::MEMb_ADDr_indexed_MEM_V4 : + case Hexagon::MEMb_SUBr_indexed_MEM_V4 : + case Hexagon::MEMb_ANDr_indexed_MEM_V4 : + case Hexagon::MEMb_ORr_indexed_MEM_V4 : + case Hexagon::MEMb_ADDSUBi_MEM_V4 : + case Hexagon::MEMb_ADDi_MEM_V4 : + case Hexagon::MEMb_SUBi_MEM_V4 : + case Hexagon::MEMb_ADDr_MEM_V4 : + case Hexagon::MEMb_SUBr_MEM_V4 : + case Hexagon::MEMb_ANDr_MEM_V4 : + case Hexagon::MEMb_ORr_MEM_V4 : + return (0 <= Offset && Offset <= 63); + + // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of + // any size. Later pass knows how to handle it. + case Hexagon::STriw_pred: + case Hexagon::LDriw_pred: + return true; + + // INLINEASM is very special. + case Hexagon::INLINEASM: + return true; + } + + assert(0 && "No offset range is defined for this opcode. Please define it in \ + the above switch statement!"); +} + + +// +// Check if the Offset is a valid auto-inc imm by Load/Store Type. +// +bool HexagonInstrInfo:: +isValidAutoIncImm(const EVT VT, const int Offset) const { + + if (VT == MVT::i64) { + return (Offset >= Hexagon_MEMD_AUTOINC_MIN && + Offset <= Hexagon_MEMD_AUTOINC_MAX && + (Offset & 0x7) == 0); + } + if (VT == MVT::i32) { + return (Offset >= Hexagon_MEMW_AUTOINC_MIN && + Offset <= Hexagon_MEMW_AUTOINC_MAX && + (Offset & 0x3) == 0); + } + if (VT == MVT::i16) { + return (Offset >= Hexagon_MEMH_AUTOINC_MIN && + Offset <= Hexagon_MEMH_AUTOINC_MAX && + (Offset & 0x1) == 0); + } + if (VT == MVT::i8) { + return (Offset >= Hexagon_MEMB_AUTOINC_MIN && + Offset <= Hexagon_MEMB_AUTOINC_MAX); + } + + assert(0 && "Not an auto-inc opc!"); + + return false; +} + + +bool HexagonInstrInfo:: +isMemOp(const MachineInstr *MI) const { + switch (MI->getOpcode()) + { + case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 : + case Hexagon::MEMw_ADDi_indexed_MEM_V4 : + case Hexagon::MEMw_SUBi_indexed_MEM_V4 : + case Hexagon::MEMw_ADDr_indexed_MEM_V4 : + case Hexagon::MEMw_SUBr_indexed_MEM_V4 : + case Hexagon::MEMw_ANDr_indexed_MEM_V4 : + case Hexagon::MEMw_ORr_indexed_MEM_V4 : + case Hexagon::MEMw_ADDSUBi_MEM_V4 : + case Hexagon::MEMw_ADDi_MEM_V4 : + case Hexagon::MEMw_SUBi_MEM_V4 : + case Hexagon::MEMw_ADDr_MEM_V4 : + case Hexagon::MEMw_SUBr_MEM_V4 : + case Hexagon::MEMw_ANDr_MEM_V4 : + case Hexagon::MEMw_ORr_MEM_V4 : + case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 : + case Hexagon::MEMh_ADDi_indexed_MEM_V4 : + case Hexagon::MEMh_SUBi_indexed_MEM_V4 : + case Hexagon::MEMh_ADDr_indexed_MEM_V4 : + case Hexagon::MEMh_SUBr_indexed_MEM_V4 : + case Hexagon::MEMh_ANDr_indexed_MEM_V4 : + case Hexagon::MEMh_ORr_indexed_MEM_V4 : + case Hexagon::MEMh_ADDSUBi_MEM_V4 : + case Hexagon::MEMh_ADDi_MEM_V4 : + case Hexagon::MEMh_SUBi_MEM_V4 : + case Hexagon::MEMh_ADDr_MEM_V4 : + case Hexagon::MEMh_SUBr_MEM_V4 : + case Hexagon::MEMh_ANDr_MEM_V4 : + case Hexagon::MEMh_ORr_MEM_V4 : + case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 : + case Hexagon::MEMb_ADDi_indexed_MEM_V4 : + case Hexagon::MEMb_SUBi_indexed_MEM_V4 : + case Hexagon::MEMb_ADDr_indexed_MEM_V4 : + case Hexagon::MEMb_SUBr_indexed_MEM_V4 : + case Hexagon::MEMb_ANDr_indexed_MEM_V4 : + case Hexagon::MEMb_ORr_indexed_MEM_V4 : + case Hexagon::MEMb_ADDSUBi_MEM_V4 : + case Hexagon::MEMb_ADDi_MEM_V4 : + case Hexagon::MEMb_SUBi_MEM_V4 : + case Hexagon::MEMb_ADDr_MEM_V4 : + case Hexagon::MEMb_SUBr_MEM_V4 : + case Hexagon::MEMb_ANDr_MEM_V4 : + case Hexagon::MEMb_ORr_MEM_V4 : + return true; + } + return false; +} + + +bool HexagonInstrInfo:: +isSpillPredRegOp(const MachineInstr *MI) const { + switch (MI->getOpcode()) + { + case Hexagon::STriw_pred : + case Hexagon::LDriw_pred : + return true; + } + return false; +} + + +bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const { + const HexagonRegisterInfo& QRI = getRegisterInfo(); + switch (MI->getOpcode()) + { + case Hexagon::ADD_ri_cPt: + case Hexagon::ADD_ri_cNotPt: + case Hexagon::ADD_rr_cPt: + case Hexagon::ADD_rr_cNotPt: + case Hexagon::XOR_rr_cPt: + case Hexagon::XOR_rr_cNotPt: + case Hexagon::AND_rr_cPt: + case Hexagon::AND_rr_cNotPt: + case Hexagon::OR_rr_cPt: + case Hexagon::OR_rr_cNotPt: + case Hexagon::SUB_rr_cPt: + case Hexagon::SUB_rr_cNotPt: + case Hexagon::COMBINE_rr_cPt: + case Hexagon::COMBINE_rr_cNotPt: + return true; + case Hexagon::ASLH_cPt_V4: + case Hexagon::ASLH_cNotPt_V4: + case Hexagon::ASRH_cPt_V4: + case Hexagon::ASRH_cNotPt_V4: + case Hexagon::SXTB_cPt_V4: + case Hexagon::SXTB_cNotPt_V4: + case Hexagon::SXTH_cPt_V4: + case Hexagon::SXTH_cNotPt_V4: + case Hexagon::ZXTB_cPt_V4: + case Hexagon::ZXTB_cNotPt_V4: + case Hexagon::ZXTH_cPt_V4: + case Hexagon::ZXTH_cNotPt_V4: + return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4; + + default: + return false; + } + return false; +} + + +bool HexagonInstrInfo:: +isConditionalLoad (const MachineInstr* MI) const { + const HexagonRegisterInfo& QRI = getRegisterInfo(); + switch (MI->getOpcode()) + { + case Hexagon::LDrid_cPt : + case Hexagon::LDrid_cNotPt : + case Hexagon::LDrid_indexed_cPt : + case Hexagon::LDrid_indexed_cNotPt : + case Hexagon::LDriw_cPt : + case Hexagon::LDriw_cNotPt : + case Hexagon::LDriw_indexed_cPt : + case Hexagon::LDriw_indexed_cNotPt : + case Hexagon::LDrih_cPt : + case Hexagon::LDrih_cNotPt : + case Hexagon::LDrih_indexed_cPt : + case Hexagon::LDrih_indexed_cNotPt : + case Hexagon::LDrib_cPt : + case Hexagon::LDrib_cNotPt : + case Hexagon::LDrib_indexed_cPt : + case Hexagon::LDrib_indexed_cNotPt : + case Hexagon::LDriuh_cPt : + case Hexagon::LDriuh_cNotPt : + case Hexagon::LDriuh_indexed_cPt : + case Hexagon::LDriuh_indexed_cNotPt : + case Hexagon::LDriub_cPt : + case Hexagon::LDriub_cNotPt : + case Hexagon::LDriub_indexed_cPt : + case Hexagon::LDriub_indexed_cNotPt : + return true; + case Hexagon::POST_LDrid_cPt : + case Hexagon::POST_LDrid_cNotPt : + case Hexagon::POST_LDriw_cPt : + case Hexagon::POST_LDriw_cNotPt : + case Hexagon::POST_LDrih_cPt : + case Hexagon::POST_LDrih_cNotPt : + case Hexagon::POST_LDrib_cPt : + case Hexagon::POST_LDrib_cNotPt : + case Hexagon::POST_LDriuh_cPt : + case Hexagon::POST_LDriuh_cNotPt : + case Hexagon::POST_LDriub_cPt : + case Hexagon::POST_LDriub_cNotPt : + return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4; + case Hexagon::LDrid_indexed_cPt_V4 : + case Hexagon::LDrid_indexed_cNotPt_V4 : + case Hexagon::LDrid_indexed_shl_cPt_V4 : + case Hexagon::LDrid_indexed_shl_cNotPt_V4 : + case Hexagon::LDrib_indexed_cPt_V4 : + case Hexagon::LDrib_indexed_cNotPt_V4 : + case Hexagon::LDrib_indexed_shl_cPt_V4 : + case Hexagon::LDrib_indexed_shl_cNotPt_V4 : + case Hexagon::LDriub_indexed_cPt_V4 : + case Hexagon::LDriub_indexed_cNotPt_V4 : + case Hexagon::LDriub_indexed_shl_cPt_V4 : + case Hexagon::LDriub_indexed_shl_cNotPt_V4 : + case Hexagon::LDrih_indexed_cPt_V4 : + case Hexagon::LDrih_indexed_cNotPt_V4 : + case Hexagon::LDrih_indexed_shl_cPt_V4 : + case Hexagon::LDrih_indexed_shl_cNotPt_V4 : + case Hexagon::LDriuh_indexed_cPt_V4 : + case Hexagon::LDriuh_indexed_cNotPt_V4 : + case Hexagon::LDriuh_indexed_shl_cPt_V4 : + case Hexagon::LDriuh_indexed_shl_cNotPt_V4 : + case Hexagon::LDriw_indexed_cPt_V4 : + case Hexagon::LDriw_indexed_cNotPt_V4 : + case Hexagon::LDriw_indexed_shl_cPt_V4 : + case Hexagon::LDriw_indexed_shl_cNotPt_V4 : + return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4; + default: + return false; + } + return false; +} diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h new file mode 100644 index 0000000..d549c46 --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -0,0 +1,166 @@ +//=- HexagonInstrInfo.h - Hexagon Instruction Information ---------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Hexagon implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonINSTRUCTIONINFO_H +#define HexagonINSTRUCTIONINFO_H + +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "HexagonRegisterInfo.h" + + +#define GET_INSTRINFO_HEADER +#include "HexagonGenInstrInfo.inc" + +namespace llvm { + +class HexagonInstrInfo : public HexagonGenInstrInfo { + const HexagonRegisterInfo RI; + const HexagonSubtarget& Subtarget; +public: + explicit HexagonInstrInfo(HexagonSubtarget &ST); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const HexagonRegisterInfo &getRegisterInfo() const { return RI; } + + /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + + virtual bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const; + + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + int FrameIndex) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + MachineInstr* LoadMI) const { + return 0; + } + + unsigned createVR(MachineFunction* MF, MVT VT) const; + + virtual bool isPredicable(MachineInstr *MI) const; + virtual bool + PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Cond) const; + + virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, + unsigned ExtraPredCycles, + const BranchProbability &Probability) const; + + virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumTCycles, unsigned ExtraTCycles, + MachineBasicBlock &FMBB, + unsigned NumFCycles, unsigned ExtraFCycles, + const BranchProbability &Probability) const; + + virtual bool isPredicated(const MachineInstr *MI) const; + virtual bool DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const; + virtual bool + SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const; + + virtual bool + ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; + + virtual bool + isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumCycles, + const BranchProbability &Probability) const; + + bool isValidOffset(const int Opcode, const int Offset) const; + bool isValidAutoIncImm(const EVT VT, const int Offset) const; + bool isMemOp(const MachineInstr *MI) const; + bool isSpillPredRegOp(const MachineInstr *MI) const; + bool isU6_3Immediate(const int value) const; + bool isU6_2Immediate(const int value) const; + bool isU6_1Immediate(const int value) const; + bool isU6_0Immediate(const int value) const; + bool isS4_3Immediate(const int value) const; + bool isS4_2Immediate(const int value) const; + bool isS4_1Immediate(const int value) const; + bool isS4_0Immediate(const int value) const; + bool isS12_Immediate(const int value) const; + bool isU6_Immediate(const int value) const; + bool isS8_Immediate(const int value) const; + bool isS6_Immediate(const int value) const; + + bool isConditionalALU32 (const MachineInstr* MI) const; + bool isConditionalLoad (const MachineInstr* MI) const; + bool isDeallocRet(const MachineInstr *MI) const; + +private: + int getMatchingCondBranchOpcode(int Opc, bool sense) const; + +}; + +} + +#endif diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td new file mode 100644 index 0000000..cc508b7 --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -0,0 +1,3014 @@ +//==- HexagonInstrInfo.td - Target Description for Hexagon -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +include "HexagonInstrFormats.td" +include "HexagonImmediates.td" + +//===----------------------------------------------------------------------===// +// Hexagon Instruction Predicate Definitions. +//===----------------------------------------------------------------------===// +def HasV2T : Predicate<"Subtarget.hasV2TOps()">; +def HasV2TOnly : Predicate<"Subtarget.hasV2TOpsOnly()">; +def NoV2T : Predicate<"!Subtarget.hasV2TOps()">; +def HasV3T : Predicate<"Subtarget.hasV3TOps()">; +def HasV3TOnly : Predicate<"Subtarget.hasV3TOpsOnly()">; +def NoV3T : Predicate<"!Subtarget.hasV3TOps()">; +def HasV4T : Predicate<"Subtarget.hasV4TOps()">; +def NoV4T : Predicate<"!Subtarget.hasV4TOps()">; +def UseMEMOP : Predicate<"Subtarget.useMemOps()">; + +// Addressing modes. +def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; +def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>; +def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>; +def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>; +def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>; +def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>; +def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>; +def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>; +def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>; + +// Address operands. +def MEMrr : Operand<i32> { + let PrintMethod = "printHexagonMEMrrOperand"; + let MIOperandInfo = (ops IntRegs, IntRegs); +} + +// Address operands +def MEMri : Operand<i32> { + let PrintMethod = "printHexagonMEMriOperand"; + let MIOperandInfo = (ops IntRegs, IntRegs); +} + +def MEMri_s11_2 : Operand<i32>, + ComplexPattern<i32, 2, "SelectMEMriS11_2", []> { + let PrintMethod = "printHexagonMEMriOperand"; + let MIOperandInfo = (ops IntRegs, s11Imm); +} + +def FrameIndex : Operand<i32> { + let PrintMethod = "printHexagonFrameIndexOperand"; + let MIOperandInfo = (ops IntRegs, s11Imm); +} + +let PrintMethod = "printGlobalOperand" in + def globaladdress : Operand<i32>; + +let PrintMethod = "printJumpTable" in + def jumptablebase : Operand<i32>; + +def brtarget : Operand<OtherVT>; +def calltarget : Operand<i32>; + +def bblabel : Operand<i32>; +def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf , [], "BasicBlockSDNode">; + +def symbolHi32 : Operand<i32> { + let PrintMethod = "printSymbolHi"; +} +def symbolLo32 : Operand<i32> { + let PrintMethod = "printSymbolLo"; +} + +// Multi-class for logical operators. +multiclass ALU32_rr_ri<string OpcStr, SDNode OpNode> { + def rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), + [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>; + def ri : ALU32_ri<(outs IntRegs:$dst), (ins s10Imm:$b, IntRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "(#$b, $c)")), + [(set IntRegs:$dst, (OpNode s10Imm:$b, IntRegs:$c))]>; +} + +// Multi-class for compare ops. +let isCompare = 1 in { +multiclass CMP64_rr<string OpcStr, PatFrag OpNode> { + def rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), + [(set PredRegs:$dst, (OpNode DoubleRegs:$b, DoubleRegs:$c))]>; +} +multiclass CMP32_rr<string OpcStr, PatFrag OpNode> { + def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), + [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>; +} + +multiclass CMP32_rr_ri_s10<string OpcStr, PatFrag OpNode> { + def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), + [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>; + def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s10Imm:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")), + [(set PredRegs:$dst, (OpNode IntRegs:$b, s10ImmPred:$c))]>; +} + +multiclass CMP32_rr_ri_u9<string OpcStr, PatFrag OpNode> { + def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), + [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>; + def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")), + [(set PredRegs:$dst, (OpNode IntRegs:$b, u9ImmPred:$c))]>; +} + +multiclass CMP32_ri_u9<string OpcStr, PatFrag OpNode> { + def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")), + [(set PredRegs:$dst, (OpNode IntRegs:$b, u9ImmPred:$c))]>; +} + +multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> { + def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Imm:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")), + [(set PredRegs:$dst, (OpNode IntRegs:$b, s8ImmPred:$c))]>; +} +} + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// http://qualnet.qualcomm.com/~erich/v1/htmldocs/index.html +// http://qualnet.qualcomm.com/~erich/v2/htmldocs/index.html +// http://qualnet.qualcomm.com/~erich/v3/htmldocs/index.html +// http://qualnet.qualcomm.com/~erich/v4/htmldocs/index.html +// http://qualnet.qualcomm.com/~erich/v5/htmldocs/index.html +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU32/ALU + +//===----------------------------------------------------------------------===// +// Add. +let isPredicable = 1 in +def ADD_rr : ALU32_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = add($src1, $src2)", + [(set IntRegs:$dst, (add IntRegs:$src1, IntRegs:$src2))]>; + +let isPredicable = 1 in +def ADD_ri : ALU32_ri<(outs IntRegs:$dst), + (ins IntRegs:$src1, s16Imm:$src2), + "$dst = add($src1, #$src2)", + [(set IntRegs:$dst, (add IntRegs:$src1, s16ImmPred:$src2))]>; + +// Logical operations. +let isPredicable = 1 in +def XOR_rr : ALU32_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = xor($src1, $src2)", + [(set IntRegs:$dst, (xor IntRegs:$src1, IntRegs:$src2))]>; + +let isPredicable = 1 in +def AND_rr : ALU32_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = and($src1, $src2)", + [(set IntRegs:$dst, (and IntRegs:$src1, IntRegs:$src2))]>; + +def OR_ri : ALU32_ri<(outs IntRegs:$dst), + (ins IntRegs:$src1, s8Imm:$src2), + "$dst = or($src1, #$src2)", + [(set IntRegs:$dst, (or IntRegs:$src1, s8ImmPred:$src2))]>; + +def NOT_rr : ALU32_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1), + "$dst = not($src1)", + [(set IntRegs:$dst, (not IntRegs:$src1))]>; + +def AND_ri : ALU32_ri<(outs IntRegs:$dst), + (ins IntRegs:$src1, s10Imm:$src2), + "$dst = and($src1, #$src2)", + [(set IntRegs:$dst, (and IntRegs:$src1, s10ImmPred:$src2))]>; + +let isPredicable = 1 in +def OR_rr : ALU32_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = or($src1, $src2)", + [(set IntRegs:$dst, (or IntRegs:$src1, IntRegs:$src2))]>; + +// Negate. +def NEG : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = neg($src1)", + [(set IntRegs:$dst, (ineg IntRegs:$src1))]>; +// Nop. +let neverHasSideEffects = 1 in +def NOP : ALU32_rr<(outs), (ins), + "nop", + []>; + +// Subtract. +let isPredicable = 1 in +def SUB_rr : ALU32_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = sub($src1, $src2)", + [(set IntRegs:$dst, (sub IntRegs:$src1, IntRegs:$src2))]>; + +// Transfer immediate. +let isReMaterializable = 1, isPredicable = 1 in +def TFRI : ALU32_ri<(outs IntRegs:$dst), (ins s16Imm:$src1), + "$dst = #$src1", + [(set IntRegs:$dst, s16ImmPred:$src1)]>; + +// Transfer register. +let neverHasSideEffects = 1, isPredicable = 1 in +def TFR : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = $src1", + []>; + +// Transfer control register. +let neverHasSideEffects = 1 in +def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1), + "$dst = $src1", + []>; +//===----------------------------------------------------------------------===// +// ALU32/ALU - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// ALU32/PERM + +//===----------------------------------------------------------------------===// + +// Combine. +let isPredicable = 1, neverHasSideEffects = 1 in +def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = combine($src1, $src2)", + []>; + +// Mux. +def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, + DoubleRegs:$src2, + DoubleRegs:$src3), + "$dst = vmux($src1, $src2, $src3)", + []>; + +def MUX_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, + IntRegs:$src2, IntRegs:$src3), + "$dst = mux($src1, $src2, $src3)", + [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +def MUX_ir : ALU32_ir<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2, + IntRegs:$src3), + "$dst = mux($src1, #$src2, $src3)", + [(set IntRegs:$dst, (select PredRegs:$src1, + s8ImmPred:$src2, IntRegs:$src3))]>; + +def MUX_ri : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, + s8Imm:$src3), + "$dst = mux($src1, $src2, #$src3)", + [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2, + s8ImmPred:$src3))]>; + +def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2, + s8Imm:$src3), + "$dst = mux($src1, #$src2, #$src3)", + [(set IntRegs:$dst, (select PredRegs:$src1, s8ImmPred:$src2, + s8ImmPred:$src3))]>; + +// Shift halfword. +let isPredicable = 1 in +def ASLH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = aslh($src1)", + [(set IntRegs:$dst, (shl 16, IntRegs:$src1))]>; + +let isPredicable = 1 in +def ASRH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = asrh($src1)", + [(set IntRegs:$dst, (sra 16, IntRegs:$src1))]>; + +// Sign extend. +let isPredicable = 1 in +def SXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = sxtb($src1)", + [(set IntRegs:$dst, (sext_inreg IntRegs:$src1, i8))]>; + +let isPredicable = 1 in +def SXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = sxth($src1)", + [(set IntRegs:$dst, (sext_inreg IntRegs:$src1, i16))]>; + +// Zero extend. +let isPredicable = 1, neverHasSideEffects = 1 in +def ZXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = zxtb($src1)", + []>; + +let isPredicable = 1, neverHasSideEffects = 1 in +def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = zxth($src1)", + []>; +//===----------------------------------------------------------------------===// +// ALU32/PERM - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// ALU32/PRED + +//===----------------------------------------------------------------------===// + +// Conditional add. +let neverHasSideEffects = 1 in +def ADD_ri_cPt : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3), + "if ($src1) $dst = add($src2, #$src3)", + []>; + +let neverHasSideEffects = 1 in +def ADD_ri_cNotPt : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3), + "if (!$src1) $dst = add($src2, #$src3)", + []>; + +let neverHasSideEffects = 1 in +def ADD_ri_cdnPt : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3), + "if ($src1.new) $dst = add($src2, #$src3)", + []>; + +let neverHasSideEffects = 1 in +def ADD_ri_cdnNotPt : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3), + "if (!$src1.new) $dst = add($src2, #$src3)", + []>; + +let neverHasSideEffects = 1 in +def ADD_rr_cPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst = add($src2, $src3)", + []>; + +let neverHasSideEffects = 1 in +def ADD_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst = add($src2, $src3)", + []>; + +let neverHasSideEffects = 1 in +def ADD_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst = add($src2, $src3)", + []>; + +let neverHasSideEffects = 1 in +def ADD_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst = add($src2, $src3)", + []>; + + +// Conditional combine. + +let neverHasSideEffects = 1 in +def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst = combine($src2, $src3)", + []>; + +let neverHasSideEffects = 1 in +def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst = combine($src2, $src3)", + []>; + +let neverHasSideEffects = 1 in +def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst = combine($src2, $src3)", + []>; + +let neverHasSideEffects = 1 in +def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst = combine($src2, $src3)", + []>; + +// Conditional logical operations. + +def XOR_rr_cPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst = xor($src2, $src3)", + []>; + +def XOR_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst = xor($src2, $src3)", + []>; + +def XOR_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst = xor($src2, $src3)", + []>; + +def XOR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst = xor($src2, $src3)", + []>; + +def AND_rr_cPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst = and($src2, $src3)", + []>; + +def AND_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst = and($src2, $src3)", + []>; + +def AND_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst = and($src2, $src3)", + []>; + +def AND_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst = and($src2, $src3)", + []>; + +def OR_rr_cPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst = or($src2, $src3)", + []>; + +def OR_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst = or($src2, $src3)", + []>; + +def OR_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst = or($src2, $src3)", + []>; + +def OR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst = or($src2, $src3)", + []>; + + +// Conditional subtract. + +def SUB_rr_cPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst = sub($src2, $src3)", + []>; + +def SUB_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst = sub($src2, $src3)", + []>; + +def SUB_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst = sub($src2, $src3)", + []>; + +def SUB_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst = sub($src2, $src3)", + []>; + + +// Conditional transfer. + +let neverHasSideEffects = 1 in +def TFR_cPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1) $dst = $src2", + []>; + +let neverHasSideEffects = 1 in +def TFR_cNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, + IntRegs:$src2), + "if (!$src1) $dst = $src2", + []>; + +let neverHasSideEffects = 1 in +def TFRI_cPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2), + "if ($src1) $dst = #$src2", + []>; + +let neverHasSideEffects = 1 in +def TFRI_cNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, + s12Imm:$src2), + "if (!$src1) $dst = #$src2", + []>; + +let neverHasSideEffects = 1 in +def TFR_cdnPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, + IntRegs:$src2), + "if ($src1.new) $dst = $src2", + []>; + +let neverHasSideEffects = 1 in +def TFR_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, + IntRegs:$src2), + "if (!$src1.new) $dst = $src2", + []>; + +let neverHasSideEffects = 1 in +def TFRI_cdnPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, + s12Imm:$src2), + "if ($src1.new) $dst = #$src2", + []>; + +let neverHasSideEffects = 1 in +def TFRI_cdnNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, + s12Imm:$src2), + "if (!$src1.new) $dst = #$src2", + []>; + +// Compare. +defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", setugt>; +defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", setgt>; +defm CMPLT : CMP32_rr<"cmp.lt", setlt>; +defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", seteq>; +defm CMPGE : CMP32_ri_s8<"cmp.ge", setge>; +defm CMPGEU : CMP32_ri_u9<"cmp.geu", setuge>; +//===----------------------------------------------------------------------===// +// ALU32/PRED - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU32/VH + +//===----------------------------------------------------------------------===// +// Vector add halfwords + +// Vector averagehalfwords + +// Vector subtract halfwords +//===----------------------------------------------------------------------===// +// ALU32/VH - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// ALU64/ALU + +//===----------------------------------------------------------------------===// +// Add. +def ADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = add($src1, $src2)", + [(set DoubleRegs:$dst, (add DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +// Add halfword. + +// Compare. +defm CMPEHexagon4 : CMP64_rr<"cmp.eq", seteq>; +defm CMPGT64 : CMP64_rr<"cmp.gt", setgt>; +defm CMPGTU64 : CMP64_rr<"cmp.gtu", setugt>; + +// Logical operations. +def AND_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = and($src1, $src2)", + [(set DoubleRegs:$dst, (and DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +def OR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = or($src1, $src2)", + [(set DoubleRegs:$dst, (or DoubleRegs:$src1, DoubleRegs:$src2))]>; + +def XOR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = xor($src1, $src2)", + [(set DoubleRegs:$dst, (xor DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +// Maximum. +def MAXw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = max($src2, $src1)", + [(set IntRegs:$dst, (select (i1 (setlt IntRegs:$src2, + IntRegs:$src1)), + IntRegs:$src1, IntRegs:$src2))]>; + +// Minimum. +def MINw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = min($src2, $src1)", + [(set IntRegs:$dst, (select (i1 (setgt IntRegs:$src2, + IntRegs:$src1)), + IntRegs:$src1, IntRegs:$src2))]>; + +// Subtract. +def SUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = sub($src1, $src2)", + [(set DoubleRegs:$dst, (sub DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +// Subtract halfword. + +// Transfer register. +let neverHasSideEffects = 1 in +def TFR_64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), + "$dst = $src1", + []>; +//===----------------------------------------------------------------------===// +// ALU64/ALU - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU64/BIT + +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +// ALU64/BIT - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU64/PERM + +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +// ALU64/PERM - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU64/VB + +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +// ALU64/VB - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU64/VH + +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +// ALU64/VH - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU64/VW + +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +// ALU64/VW - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// CR + +//===----------------------------------------------------------------------===// +// Logical reductions on predicates. + +// Looping instructions. + +// Pipelined looping instructions. + +// Logical operations on predicates. +def AND_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2), + "$dst = and($src1, $src2)", + [(set PredRegs:$dst, (and PredRegs:$src1, PredRegs:$src2))]>; + +let neverHasSideEffects = 1 in +def AND_pnotp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, + PredRegs:$src2), + "$dst = and($src1, !$src2)", + []>; + +def NOT_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1), + "$dst = not($src1)", + [(set PredRegs:$dst, (not PredRegs:$src1))]>; + +def ANY_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1), + "$dst = any8($src1)", + []>; + +def ALL_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1), + "$dst = all8($src1)", + []>; + +def VITPACK_pp : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1, + PredRegs:$src2), + "$dst = vitpack($src1, $src2)", + []>; + +def VALIGN_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2, + PredRegs:$src3), + "$dst = valignb($src1, $src2, $src3)", + []>; + +def VSPLICE_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2, + PredRegs:$src3), + "$dst = vspliceb($src1, $src2, $src3)", + []>; + +def MASK_p : SInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1), + "$dst = mask($src1)", + []>; + +def NOT_Ps : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1), + "$dst = not($src1)", + [(set PredRegs:$dst, (not PredRegs:$src1))]>; + +def OR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2), + "$dst = or($src1, $src2)", + [(set PredRegs:$dst, (or PredRegs:$src1, PredRegs:$src2))]>; + +def XOR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2), + "$dst = xor($src1, $src2)", + [(set PredRegs:$dst, (xor PredRegs:$src1, PredRegs:$src2))]>; + + +// User control register transfer. +//===----------------------------------------------------------------------===// +// CR - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// J + +//===----------------------------------------------------------------------===// +// Jump to address. +let isBranch = 1, isTerminator=1, isBarrier = 1, isPredicable = 1 in { + def JMP : JInst< (outs), + (ins brtarget:$offset), + "jump $offset", + [(br bb:$offset)]>; +} + +// if (p0) jump +let isBranch = 1, isTerminator=1, Defs = [PC] in { + def JMP_Pred : JInst< (outs), + (ins PredRegs:$src, brtarget:$offset), + "if ($src) jump $offset", + [(brcond PredRegs:$src, bb:$offset)]>; +} + +// if (!p0) jump +let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in { + def JMP_PredNot : JInst< (outs), + (ins PredRegs:$src, brtarget:$offset), + "if (!$src) jump $offset", + []>; +} + +let isTerminator = 1, isBranch = 1, neverHasSideEffects = 1, Defs = [PC] in { + def BRCOND : JInst < (outs), (ins PredRegs:$pred, brtarget:$dst), + "if ($pred) jump $dst", + []>; +} + +// Jump to address conditioned on new predicate. +// if (p0) jump:t +let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in { + def JMP_PredPt : JInst< (outs), + (ins PredRegs:$src, brtarget:$offset), + "if ($src.new) jump:t $offset", + []>; +} + +// if (!p0) jump:t +let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in { + def JMP_PredNotPt : JInst< (outs), + (ins PredRegs:$src, brtarget:$offset), + "if (!$src.new) jump:t $offset", + []>; +} + +// Not taken. +let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in { + def JMP_PredPnt : JInst< (outs), + (ins PredRegs:$src, brtarget:$offset), + "if ($src.new) jump:nt $offset", + []>; +} + +// Not taken. +let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in { + def JMP_PredNotPnt : JInst< (outs), + (ins PredRegs:$src, brtarget:$offset), + "if (!$src.new) jump:nt $offset", + []>; +} +//===----------------------------------------------------------------------===// +// J - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// JR + +//===----------------------------------------------------------------------===// +def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue]>; + +// Jump to address from register. +let isReturn = 1, isTerminator = 1, isBarrier = 1, + Defs = [PC], Uses = [R31] in { + def JMPR: JRInst<(outs), (ins), + "jumpr r31", + [(retflag)]>; +} + +// Jump to address from register. +let isReturn = 1, isTerminator = 1, isBarrier = 1, + Defs = [PC], Uses = [R31] in { + def JMPR_cPt: JRInst<(outs), (ins PredRegs:$src1), + "if ($src1) jumpr r31", + []>; +} + +// Jump to address from register. +let isReturn = 1, isTerminator = 1, isBarrier = 1, + Defs = [PC], Uses = [R31] in { + def JMPR_cNotPt: JRInst<(outs), (ins PredRegs:$src1), + "if (!$src1) jumpr r31", + []>; +} + +//===----------------------------------------------------------------------===// +// JR - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// LD + +//===----------------------------------------------------------------------===// +/// +/// Make sure that in post increment load, the first operand is always the post +/// increment operand. +/// +// Load doubleword. +let isPredicable = 1 in +def LDrid : LDInst<(outs DoubleRegs:$dst), + (ins MEMri:$addr), + "$dst = memd($addr)", + [(set DoubleRegs:$dst, (load ADDRriS11_3:$addr))]>; + +let isPredicable = 1, AddedComplexity = 20 in +def LDrid_indexed : LDInst<(outs DoubleRegs:$dst), + (ins IntRegs:$src1, s11_3Imm:$offset), + "$dst=memd($src1+#$offset)", + [(set DoubleRegs:$dst, (load (add IntRegs:$src1, + s11_3ImmPred:$offset)))]>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrid_GP : LDInst<(outs DoubleRegs:$dst), + (ins globaladdress:$global, u16Imm:$offset), + "$dst=memd(#$global+$offset)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDd_GP : LDInst<(outs DoubleRegs:$dst), + (ins globaladdress:$global), + "$dst=memd(#$global)", + []>; + +let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrid : LDInstPI<(outs DoubleRegs:$dst, IntRegs:$dst2), + (ins IntRegs:$src1, s4Imm:$offset), + "$dst = memd($src1++#$offset)", + [], + "$src1 = $dst2">; + +// Load doubleword conditionally. +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrid_cPt : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1) $dst = memd($addr)", + []>; + + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrid_cNotPt : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1) $dst = memd($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrid_indexed_cPt : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3), + "if ($src1) $dst=memd($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrid_indexed_cNotPt : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3), + "if (!$src1) $dst=memd($src2+#$src3)", + []>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrid_cPt : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3), + "if ($src1) $dst1 = memd($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrid_cNotPt : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3), + "if (!$src1) $dst1 = memd($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrid_cdnPt : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1.new) $dst = memd($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrid_cdnNotPt : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1.new) $dst = memd($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrid_indexed_cdnPt : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3), + "if ($src1.new) $dst=memd($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrid_indexed_cdnNotPt : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3), + "if (!$src1.new) $dst=memd($src2+#$src3)", + []>; + + +// Load byte. +let isPredicable = 1 in +def LDrib : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), + "$dst = memb($addr)", + [(set IntRegs:$dst, (sextloadi8 ADDRriS11_0:$addr))]>; + +def LDrib_ae : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), + "$dst = memb($addr)", + [(set IntRegs:$dst, (extloadi8 ADDRriS11_0:$addr))]>; + +// Indexed load byte. +let isPredicable = 1, AddedComplexity = 20 in +def LDrib_indexed : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_0Imm:$offset), + "$dst=memb($src1+#$offset)", + [(set IntRegs:$dst, (sextloadi8 (add IntRegs:$src1, + s11_0ImmPred:$offset)))]>; + + +// Indexed load byte any-extend. +let AddedComplexity = 20 in +def LDrib_ae_indexed : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_0Imm:$offset), + "$dst=memb($src1+#$offset)", + [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1, + s11_0ImmPred:$offset)))]>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrib_GP : LDInst<(outs IntRegs:$dst), + (ins globaladdress:$global, u16Imm:$offset), + "$dst=memb(#$global+$offset)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDb_GP : LDInst<(outs IntRegs:$dst), + (ins globaladdress:$global), + "$dst=memb(#$global)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDub_GP : LDInst<(outs IntRegs:$dst), + (ins globaladdress:$global), + "$dst=memub(#$global)", + []>; + +let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrib : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2), + (ins IntRegs:$src1, s4Imm:$offset), + "$dst = memb($src1++#$offset)", + [], + "$src1 = $dst2">; + +// Load byte conditionally. +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrib_cPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1) $dst = memb($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrib_cNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1) $dst = memb($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrib_indexed_cPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), + "if ($src1) $dst = memb($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrib_indexed_cNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), + "if (!$src1) $dst = memb($src2+#$src3)", + []>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrib_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), + "if ($src1) $dst1 = memb($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrib_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), + "if (!$src1) $dst1 = memb($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrib_cdnPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1.new) $dst = memb($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrib_cdnNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1.new) $dst = memb($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrib_indexed_cdnPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), + "if ($src1.new) $dst = memb($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrib_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), + "if (!$src1.new) $dst = memb($src2+#$src3)", + []>; + + +// Load halfword. +let isPredicable = 1 in +def LDrih : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), + "$dst = memh($addr)", + [(set IntRegs:$dst, (sextloadi16 ADDRriS11_1:$addr))]>; + +let isPredicable = 1, AddedComplexity = 20 in +def LDrih_indexed : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_1Imm:$offset), + "$dst=memh($src1+#$offset)", + [(set IntRegs:$dst, (sextloadi16 (add IntRegs:$src1, + s11_1ImmPred:$offset)))] >; + +def LDrih_ae : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), + "$dst = memh($addr)", + [(set IntRegs:$dst, (extloadi16 ADDRriS11_1:$addr))]>; + +let AddedComplexity = 20 in +def LDrih_ae_indexed : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_1Imm:$offset), + "$dst=memh($src1+#$offset)", + [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1, + s11_1ImmPred:$offset)))] >; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrih_GP : LDInst<(outs IntRegs:$dst), + (ins globaladdress:$global, u16Imm:$offset), + "$dst=memh(#$global+$offset)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDh_GP : LDInst<(outs IntRegs:$dst), + (ins globaladdress:$global), + "$dst=memh(#$global)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDuh_GP : LDInst<(outs IntRegs:$dst), + (ins globaladdress:$global), + "$dst=memuh(#$global)", + []>; + + +let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrih : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2), + (ins IntRegs:$src1, s4Imm:$offset), + "$dst = memh($src1++#$offset)", + [], + "$src1 = $dst2">; + +// Load halfword conditionally. +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrih_cPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1) $dst = memh($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrih_cNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1) $dst = memh($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrih_indexed_cPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), + "if ($src1) $dst = memh($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrih_indexed_cNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), + "if (!$src1) $dst = memh($src2+#$src3)", + []>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrih_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), + "if ($src1) $dst1 = memh($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrih_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), + "if (!$src1) $dst1 = memh($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrih_cdnPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1.new) $dst = memh($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrih_cdnNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1.new) $dst = memh($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrih_indexed_cdnPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), + "if ($src1.new) $dst = memh($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrih_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), + "if (!$src1.new) $dst = memh($src2+#$src3)", + []>; + +// Load unsigned byte. +let isPredicable = 1 in +def LDriub : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), + "$dst = memub($addr)", + [(set IntRegs:$dst, (zextloadi8 ADDRriS11_0:$addr))]>; + +let isPredicable = 1 in +def LDriubit : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), + "$dst = memub($addr)", + [(set IntRegs:$dst, (zextloadi1 ADDRriS11_0:$addr))]>; + +let isPredicable = 1, AddedComplexity = 20 in +def LDriub_indexed : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_0Imm:$offset), + "$dst=memub($src1+#$offset)", + [(set IntRegs:$dst, (zextloadi8 (add IntRegs:$src1, + s11_0ImmPred:$offset)))]>; + +let AddedComplexity = 20 in +def LDriubit_indexed : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_0Imm:$offset), + "$dst=memub($src1+#$offset)", + [(set IntRegs:$dst, (zextloadi1 (add IntRegs:$src1, + s11_0ImmPred:$offset)))]>; + +def LDriub_ae : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), + "$dst = memub($addr)", + [(set IntRegs:$dst, (extloadi8 ADDRriS11_0:$addr))]>; + + +let AddedComplexity = 20 in +def LDriub_ae_indexed : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_0Imm:$offset), + "$dst=memub($src1+#$offset)", + [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1, + s11_0ImmPred:$offset)))]>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriub_GP : LDInst<(outs IntRegs:$dst), + (ins globaladdress:$global, u16Imm:$offset), + "$dst=memub(#$global+$offset)", + []>; + +let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriub : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2), + (ins IntRegs:$src1, s4Imm:$offset), + "$dst = memub($src1++#$offset)", + [], + "$src1 = $dst2">; + +// Load unsigned byte conditionally. +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriub_cPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1) $dst = memub($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriub_cNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1) $dst = memub($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriub_indexed_cPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), + "if ($src1) $dst = memub($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriub_indexed_cNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), + "if (!$src1) $dst = memub($src2+#$src3)", + []>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriub_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), + "if ($src1) $dst1 = memub($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriub_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), + "if (!$src1) $dst1 = memub($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriub_cdnPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1.new) $dst = memub($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriub_cdnNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1.new) $dst = memub($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriub_indexed_cdnPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), + "if ($src1.new) $dst = memub($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriub_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), + "if (!$src1.new) $dst = memub($src2+#$src3)", + []>; + +// Load unsigned halfword. +let isPredicable = 1 in +def LDriuh : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), + "$dst = memuh($addr)", + [(set IntRegs:$dst, (zextloadi16 ADDRriS11_1:$addr))]>; + +// Indexed load unsigned halfword. +let isPredicable = 1, AddedComplexity = 20 in +def LDriuh_indexed : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_1Imm:$offset), + "$dst=memuh($src1+#$offset)", + [(set IntRegs:$dst, (zextloadi16 (add IntRegs:$src1, + s11_1ImmPred:$offset)))]>; + +def LDriuh_ae : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), + "$dst = memuh($addr)", + [(set IntRegs:$dst, (extloadi16 ADDRriS11_1:$addr))]>; + + +// Indexed load unsigned halfword any-extend. +let AddedComplexity = 20 in +def LDriuh_ae_indexed : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_1Imm:$offset), + "$dst=memuh($src1+#$offset)", + [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1, + s11_1ImmPred:$offset)))] >; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriuh_GP : LDInst<(outs IntRegs:$dst), + (ins globaladdress:$global, u16Imm:$offset), + "$dst=memuh(#$global+$offset)", + []>; + +let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriuh : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2), + (ins IntRegs:$src1, s4Imm:$offset), + "$dst = memuh($src1++#$offset)", + [], + "$src1 = $dst2">; + +// Load unsigned halfword conditionally. +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriuh_cPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1) $dst = memuh($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriuh_cNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1) $dst = memuh($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriuh_indexed_cPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), + "if ($src1) $dst = memuh($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriuh_indexed_cNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), + "if (!$src1) $dst = memuh($src2+#$src3)", + []>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriuh_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), + "if ($src1) $dst1 = memuh($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriuh_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), + "if (!$src1) $dst1 = memuh($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriuh_cdnPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1.new) $dst = memuh($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriuh_cdnNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1.new) $dst = memuh($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriuh_indexed_cdnPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), + "if ($src1.new) $dst = memuh($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriuh_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), + "if (!$src1.new) $dst = memuh($src2+#$src3)", + []>; + + +// Load word. +let isPredicable = 1 in +def LDriw : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), "$dst = memw($addr)", + [(set IntRegs:$dst, (load ADDRriS11_2:$addr))]>; + +// Load predicate. +let mayLoad = 1, Defs = [R10,R11] in +def LDriw_pred : LDInst<(outs PredRegs:$dst), + (ins MEMri:$addr), + "Error; should not emit", + []>; + +// Indexed load. +let isPredicable = 1, AddedComplexity = 20 in +def LDriw_indexed : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_2Imm:$offset), + "$dst=memw($src1+#$offset)", + [(set IntRegs:$dst, (load (add IntRegs:$src1, + s11_2ImmPred:$offset)))]>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriw_GP : LDInst<(outs IntRegs:$dst), + (ins globaladdress:$global, u16Imm:$offset), + "$dst=memw(#$global+$offset)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDw_GP : LDInst<(outs IntRegs:$dst), + (ins globaladdress:$global), + "$dst=memw(#$global)", + []>; + +let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriw : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2), + (ins IntRegs:$src1, s4Imm:$offset), + "$dst = memw($src1++#$offset)", + [], + "$src1 = $dst2">; + +// Load word conditionally. + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriw_cPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1) $dst = memw($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriw_cNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1) $dst = memw($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriw_indexed_cPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3), + "if ($src1) $dst=memw($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriw_indexed_cNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3), + "if (!$src1) $dst=memw($src2+#$src3)", + []>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriw_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3), + "if ($src1) $dst1 = memw($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriw_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3), + "if (!$src1) $dst1 = memw($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriw_cdnPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1.new) $dst = memw($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriw_cdnNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1.new) $dst = memw($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriw_indexed_cdnPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3), + "if ($src1.new) $dst=memw($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriw_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3), + "if (!$src1.new) $dst=memw($src2+#$src3)", + []>; + +// Deallocate stack frame. +let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in { + def DEALLOCFRAME : LDInst<(outs), (ins i32imm:$amt1), + "deallocframe", + []>; +} + +// Load and unpack bytes to halfwords. +//===----------------------------------------------------------------------===// +// LD - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/ALU + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// MTYPE/ALU - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/COMPLEX + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// MTYPE/COMPLEX - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/MPYH + +//===----------------------------------------------------------------------===// +// Multiply and use lower result. +// Rd=+mpyi(Rs,#u8) +def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2), + "$dst =+ mpyi($src1, #$src2)", + [(set IntRegs:$dst, (mul IntRegs:$src1, u8ImmPred:$src2))]>; + +// Rd=-mpyi(Rs,#u8) +def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, n8Imm:$src2), + "$dst =- mpyi($src1, #$src2)", + [(set IntRegs:$dst, + (mul IntRegs:$src1, n8ImmPred:$src2))]>; + +// Rd=mpyi(Rs,#m9) +// s9 is NOT the same as m9 - but it works.. so far. +// Assembler maps to either Rd=+mpyi(Rs,#u8 or Rd=-mpyi(Rs,#u8) +// depending on the value of m9. See Arch Spec. +def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2), + "$dst = mpyi($src1, #$src2)", + [(set IntRegs:$dst, (mul IntRegs:$src1, s9ImmPred:$src2))]>; + +// Rd=mpyi(Rs,Rt) +def MPYI : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = mpyi($src1, $src2)", + [(set IntRegs:$dst, (mul IntRegs:$src1, IntRegs:$src2))]>; + +// Rx+=mpyi(Rs,#u8) +def MPYI_acc_ri : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3), + "$dst += mpyi($src2, #$src3)", + [(set IntRegs:$dst, + (add (mul IntRegs:$src2, u8ImmPred:$src3), IntRegs:$src1))], + "$src1 = $dst">; + +// Rx+=mpyi(Rs,Rt) +def MPYI_acc_rr : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst += mpyi($src2, $src3)", + [(set IntRegs:$dst, + (add (mul IntRegs:$src2, IntRegs:$src3), IntRegs:$src1))], + "$src1 = $dst">; + +// Rx-=mpyi(Rs,#u8) +def MPYI_sub_ri : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3), + "$dst -= mpyi($src2, #$src3)", + [(set IntRegs:$dst, + (sub IntRegs:$src1, (mul IntRegs:$src2, u8ImmPred:$src3)))], + "$src1 = $dst">; + +// Multiply and use upper result. +// Rd=mpy(Rs,Rt.H):<<1:rnd:sat +// Rd=mpy(Rs,Rt.L):<<1:rnd:sat +// Rd=mpy(Rs,Rt) +def MPY : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = mpy($src1, $src2)", + [(set IntRegs:$dst, (mulhs IntRegs:$src1, IntRegs:$src2))]>; + +// Rd=mpy(Rs,Rt):rnd +// Rd=mpyu(Rs,Rt) +def MPYU : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = mpyu($src1, $src2)", + [(set IntRegs:$dst, (mulhu IntRegs:$src1, IntRegs:$src2))]>; + +// Multiply and use full result. +// Rdd=mpyu(Rs,Rt) +def MPYU64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = mpyu($src1, $src2)", + [(set DoubleRegs:$dst, (mul (i64 (anyext IntRegs:$src1)), + (i64 (anyext IntRegs:$src2))))]>; + +// Rdd=mpy(Rs,Rt) +def MPY64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = mpy($src1, $src2)", + [(set DoubleRegs:$dst, (mul (i64 (sext IntRegs:$src1)), + (i64 (sext IntRegs:$src2))))]>; + + +// Multiply and accumulate, use full result. +// Rxx[+-]=mpy(Rs,Rt) +// Rxx+=mpy(Rs,Rt) +def MPY64_acc : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst += mpy($src2, $src3)", + [(set DoubleRegs:$dst, + (add (mul (i64 (sext IntRegs:$src2)), (i64 (sext IntRegs:$src3))), + DoubleRegs:$src1))], + "$src1 = $dst">; + +// Rxx-=mpy(Rs,Rt) +def MPY64_sub : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst -= mpy($src2, $src3)", + [(set DoubleRegs:$dst, + (sub DoubleRegs:$src1, + (mul (i64 (sext IntRegs:$src2)), (i64 (sext IntRegs:$src3)))))], + "$src1 = $dst">; + +// Rxx[+-]=mpyu(Rs,Rt) +// Rxx+=mpyu(Rs,Rt) +def MPYU64_acc : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + IntRegs:$src2, IntRegs:$src3), + "$dst += mpyu($src2, $src3)", + [(set DoubleRegs:$dst, (add (mul (i64 (anyext IntRegs:$src2)), + (i64 (anyext IntRegs:$src3))), + DoubleRegs:$src1))],"$src1 = $dst">; + +// Rxx-=mpyu(Rs,Rt) +def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst += mpyu($src2, $src3)", + [(set DoubleRegs:$dst, + (sub DoubleRegs:$src1, + (mul (i64 (anyext IntRegs:$src2)), + (i64 (anyext IntRegs:$src3)))))], + "$src1 = $dst">; + + +def ADDrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, + IntRegs:$src2, IntRegs:$src3), + "$dst += add($src2, $src3)", + [(set IntRegs:$dst, (add (add IntRegs:$src2, IntRegs:$src3), + IntRegs:$src1))], + "$src1 = $dst">; + +def ADDri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, + IntRegs:$src2, s8Imm:$src3), + "$dst += add($src2, #$src3)", + [(set IntRegs:$dst, (add (add IntRegs:$src2, s8ImmPred:$src3), + IntRegs:$src1))], + "$src1 = $dst">; + +def SUBrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, + IntRegs:$src2, IntRegs:$src3), + "$dst -= add($src2, $src3)", + [(set IntRegs:$dst, (sub IntRegs:$src1, (add IntRegs:$src2, + IntRegs:$src3)))], + "$src1 = $dst">; + +def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, + IntRegs:$src2, s8Imm:$src3), + "$dst -= add($src2, #$src3)", + [(set IntRegs:$dst, (sub IntRegs:$src1, + (add IntRegs:$src2, s8ImmPred:$src3)))], + "$src1 = $dst">; + +//===----------------------------------------------------------------------===// +// MTYPE/MPYH - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/MPYS + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// MTYPE/MPYS - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/VB + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// MTYPE/VB - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/VH + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// MTYPE/VH - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ST + +//===----------------------------------------------------------------------===// +/// +/// Assumptions::: ****** DO NOT IGNORE ******** +/// 1. Make sure that in post increment store, the zero'th operand is always the +/// post increment operand. +/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the +/// last operand. +/// +// Store doubleword. +let isPredicable = 1 in +def STrid : STInst<(outs), + (ins MEMri:$addr, DoubleRegs:$src1), + "memd($addr) = $src1", + [(store DoubleRegs:$src1, ADDRriS11_3:$addr)]>; + +// Indexed store double word. +let AddedComplexity = 10, isPredicable = 1 in +def STrid_indexed : STInst<(outs), + (ins IntRegs:$src1, s11_3Imm:$src2, DoubleRegs:$src3), + "memd($src1+#$src2) = $src3", + [(store DoubleRegs:$src3, + (add IntRegs:$src1, s11_3ImmPred:$src2))]>; + +let mayStore = 1, neverHasSideEffects = 1 in +def STrid_GP : STInst<(outs), + (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src), + "memd(#$global+$offset) = $src", + []>; + +let hasCtrlDep = 1, isPredicable = 1 in +def POST_STdri : STInstPI<(outs IntRegs:$dst), + (ins DoubleRegs:$src1, IntRegs:$src2, s4Imm:$offset), + "memd($src2++#$offset) = $src1", + [(set IntRegs:$dst, + (post_store DoubleRegs:$src1, IntRegs:$src2, s4_3ImmPred:$offset))], + "$src2 = $dst">; + +// Store doubleword conditionally. +// if ([!]Pv) memd(Rs+#u6:3)=Rtt +// if (Pv) memd(Rs+#u6:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_cPt : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2), + "if ($src1) memd($addr) = $src2", + []>; + +// if (!Pv) memd(Rs+#u6:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_cNotPt : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2), + "if (!$src1) memd($addr) = $src2", + []>; + +// if (Pv) memd(Rs+#u6:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_indexed_cPt : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3, + DoubleRegs:$src4), + "if ($src1) memd($src2+#$src3) = $src4", + []>; + +// if (!Pv) memd(Rs+#u6:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_indexed_cNotPt : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3, + DoubleRegs:$src4), + "if (!$src1) memd($src2+#$src3) = $src4", + []>; + +// if ([!]Pv) memd(Rx++#s4:3)=Rtt +// if (Pv) memd(Rx++#s4:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def POST_STdri_cPt : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, + s4_3Imm:$offset), + "if ($src1) memd($src3++#$offset) = $src2", + [], + "$src3 = $dst">; + +// if (!Pv) memd(Rx++#s4:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def POST_STdri_cNotPt : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, + s4_3Imm:$offset), + "if (!$src1) memd($src3++#$offset) = $src2", + [], + "$src3 = $dst">; + + +// Store byte. +// memb(Rs+#s11:0)=Rt +let isPredicable = 1 in +def STrib : STInst<(outs), + (ins MEMri:$addr, IntRegs:$src1), + "memb($addr) = $src1", + [(truncstorei8 IntRegs:$src1, ADDRriS11_0:$addr)]>; + +let AddedComplexity = 10, isPredicable = 1 in +def STrib_indexed : STInst<(outs), + (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3), + "memb($src1+#$src2) = $src3", + [(truncstorei8 IntRegs:$src3, (add IntRegs:$src1, + s11_0ImmPred:$src2))]>; + +// memb(gp+#u16:0)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_GP : STInst<(outs), + (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), + "memb(#$global+$offset) = $src", + []>; + +let mayStore = 1, neverHasSideEffects = 1 in +def STb_GP : STInst<(outs), + (ins globaladdress:$global, IntRegs:$src), + "memb(#$global) = $src", + []>; + +// memb(Rx++#s4:0)=Rt +let hasCtrlDep = 1, isPredicable = 1 in +def POST_STbri : STInstPI<(outs IntRegs:$dst), (ins IntRegs:$src1, + IntRegs:$src2, + s4Imm:$offset), + "memb($src2++#$offset) = $src1", + [(set IntRegs:$dst, + (post_truncsti8 IntRegs:$src1, IntRegs:$src2, + s4_0ImmPred:$offset))], + "$src2 = $dst">; + +// Store byte conditionally. +// if ([!]Pv) memb(Rs+#u6:0)=Rt +// if (Pv) memb(Rs+#u6:0)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_cPt : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1) memb($addr) = $src2", + []>; + +// if (!Pv) memb(Rs+#u6:0)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_cNotPt : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1) memb($addr) = $src2", + []>; + +// if (Pv) memb(Rs+#u6:0)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_indexed_cPt : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), + "if ($src1) memb($src2+#$src3) = $src4", + []>; + +// if (!Pv) memb(Rs+#u6:0)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_indexed_cNotPt : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), + "if (!$src1) memb($src2+#$src3) = $src4", + []>; + +// if ([!]Pv) memb(Rx++#s4:0)=Rt +// if (Pv) memb(Rx++#s4:0)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_STbri_cPt : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), + "if ($src1) memb($src3++#$offset) = $src2", + [],"$src3 = $dst">; + +// if (!Pv) memb(Rx++#s4:0)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_STbri_cNotPt : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), + "if (!$src1) memb($src3++#$offset) = $src2", + [],"$src3 = $dst">; + + +// Store halfword. +// memh(Rs+#s11:1)=Rt +let isPredicable = 1 in +def STrih : STInst<(outs), + (ins MEMri:$addr, IntRegs:$src1), + "memh($addr) = $src1", + [(truncstorei16 IntRegs:$src1, ADDRriS11_1:$addr)]>; + + +let AddedComplexity = 10, isPredicable = 1 in +def STrih_indexed : STInst<(outs), + (ins IntRegs:$src1, s11_1Imm:$src2, IntRegs:$src3), + "memh($src1+#$src2) = $src3", + [(truncstorei16 IntRegs:$src3, (add IntRegs:$src1, + s11_1ImmPred:$src2))]>; + +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_GP : STInst<(outs), + (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), + "memh(#$global+$offset) = $src", + []>; + +let mayStore = 1, neverHasSideEffects = 1 in +def STh_GP : STInst<(outs), + (ins globaladdress:$global, IntRegs:$src), + "memh(#$global) = $src", + []>; + +// memh(Rx++#s4:1)=Rt.H +// memh(Rx++#s4:1)=Rt +let hasCtrlDep = 1, isPredicable = 1 in +def POST_SThri : STInstPI<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset), + "memh($src2++#$offset) = $src1", + [(set IntRegs:$dst, + (post_truncsti16 IntRegs:$src1, IntRegs:$src2, + s4_1ImmPred:$offset))], + "$src2 = $dst">; + +// Store halfword conditionally. +// if ([!]Pv) memh(Rs+#u6:1)=Rt +// if (Pv) memh(Rs+#u6:1)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_cPt : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1) memh($addr) = $src2", + []>; + +// if (!Pv) memh(Rs+#u6:1)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_cNotPt : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1) memh($addr) = $src2", + []>; + +// if (Pv) memh(Rs+#u6:1)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_indexed_cPt : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), + "if ($src1) memh($src2+#$src3) = $src4", + []>; + +// if (!Pv) memh(Rs+#u6:1)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_indexed_cNotPt : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), + "if (!$src1) memh($src2+#$src3) = $src4", + []>; + +// if ([!]Pv) memh(Rx++#s4:1)=Rt +// if (Pv) memh(Rx++#s4:1)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_SThri_cPt : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), + "if ($src1) memh($src3++#$offset) = $src2", + [],"$src3 = $dst">; + +// if (!Pv) memh(Rx++#s4:1)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_SThri_cNotPt : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), + "if (!$src1) memh($src3++#$offset) = $src2", + [],"$src3 = $dst">; + + +// Store word. +// Store predicate. +let Defs = [R10,R11] in +def STriw_pred : STInst<(outs), + (ins MEMri:$addr, PredRegs:$src1), + "Error; should not emit", + []>; + +// memw(Rs+#s11:2)=Rt +let isPredicable = 1 in +def STriw : STInst<(outs), + (ins MEMri:$addr, IntRegs:$src1), + "memw($addr) = $src1", + [(store IntRegs:$src1, ADDRriS11_2:$addr)]>; + +let AddedComplexity = 10, isPredicable = 1 in +def STriw_indexed : STInst<(outs), + (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3), + "memw($src1+#$src2) = $src3", + [(store IntRegs:$src3, (add IntRegs:$src1, s11_2ImmPred:$src2))]>; + +def STriwt : STInst<(outs), + (ins MEMri:$addr, DoubleRegs:$src1), + "memw($addr) = $src1", + [(truncstorei32 DoubleRegs:$src1, ADDRriS11_2:$addr)]>; + +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_GP : STInst<(outs), + (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), + "memw(#$global+$offset) = $src", + []>; + +let hasCtrlDep = 1, isPredicable = 1 in +def POST_STwri : STInstPI<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset), + "memw($src2++#$offset) = $src1", + [(set IntRegs:$dst, + (post_store IntRegs:$src1, IntRegs:$src2, s4_2ImmPred:$offset))], + "$src2 = $dst">; + +// Store word conditionally. +// if ([!]Pv) memw(Rs+#u6:2)=Rt +// if (Pv) memw(Rs+#u6:2)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_cPt : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1) memw($addr) = $src2", + []>; + +// if (!Pv) memw(Rs+#u6:2)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_cNotPt : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1) memw($addr) = $src2", + []>; + +// if (Pv) memw(Rs+#u6:2)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_indexed_cPt : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), + "if ($src1) memw($src2+#$src3) = $src4", + []>; + +// if (!Pv) memw(Rs+#u6:2)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_indexed_cNotPt : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), + "if (!$src1) memw($src2+#$src3) = $src4", + []>; + +// if ([!]Pv) memw(Rx++#s4:2)=Rt +// if (Pv) memw(Rx++#s4:2)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_STwri_cPt : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if ($src1) memw($src3++#$offset) = $src2", + [],"$src3 = $dst">; + +// if (!Pv) memw(Rx++#s4:2)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_STwri_cNotPt : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if (!$src1) memw($src3++#$offset) = $src2", + [],"$src3 = $dst">; + + + +// Allocate stack frame. +let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in { + def ALLOCFRAME : STInst<(outs), + (ins i32imm:$amt), + "allocframe(#$amt)", + []>; +} +//===----------------------------------------------------------------------===// +// ST - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/ALU + +//===----------------------------------------------------------------------===// +// Logical NOT. +def NOT_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), + "$dst = not($src1)", + [(set DoubleRegs:$dst, (not DoubleRegs:$src1))]>; + + +// Sign extend word to doubleword. +def SXTW : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1), + "$dst = sxtw($src1)", + [(set DoubleRegs:$dst, (sext IntRegs:$src1))]>; +//===----------------------------------------------------------------------===// +// STYPE/ALU - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/BIT + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// STYPE/BIT - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// STYPE/COMPLEX + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// STYPE/COMPLEX - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/PERM + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// STYPE/PERM - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/PRED + +//===----------------------------------------------------------------------===// +// Predicate transfer. +let neverHasSideEffects = 1 in +def TFR_RsPd : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1), + "$dst = $src1 // Should almost never emit this", + []>; + +def TFR_PdRs : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1), + "$dst = $src1 // Should almost never emit!", + [(set PredRegs:$dst, (trunc IntRegs:$src1))]>; +//===----------------------------------------------------------------------===// +// STYPE/PRED - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/SHIFT + +//===----------------------------------------------------------------------===// +// Shift by immediate. +def ASR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = asr($src1, #$src2)", + [(set IntRegs:$dst, (sra IntRegs:$src1, u5ImmPred:$src2))]>; + +def ASRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), + "$dst = asr($src1, #$src2)", + [(set DoubleRegs:$dst, (sra DoubleRegs:$src1, u6ImmPred:$src2))]>; + +def ASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = asl($src1, #$src2)", + [(set IntRegs:$dst, (shl IntRegs:$src1, u5ImmPred:$src2))]>; + +def LSR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = lsr($src1, #$src2)", + [(set IntRegs:$dst, (srl IntRegs:$src1, u5ImmPred:$src2))]>; + +def LSRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), + "$dst = lsr($src1, #$src2)", + [(set DoubleRegs:$dst, (srl DoubleRegs:$src1, u6ImmPred:$src2))]>; + +def LSRd_ri_acc : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2, + u6Imm:$src3), + "$dst += lsr($src2, #$src3)", + [(set DoubleRegs:$dst, (add DoubleRegs:$src1, + (srl DoubleRegs:$src2, + u6ImmPred:$src3)))], + "$src1 = $dst">; + +// Shift by immediate and accumulate. +def ASR_rr_acc : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, + IntRegs:$src2, + IntRegs:$src3), + "$dst += asr($src2, $src3)", + [], "$src1 = $dst">; + +// Shift by immediate and add. +def ADDASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + u3Imm:$src3), + "$dst = addasl($src1, $src2, #$src3)", + [(set IntRegs:$dst, (add IntRegs:$src1, + (shl IntRegs:$src2, + u3ImmPred:$src3)))]>; + +// Shift by register. +def ASL_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = asl($src1, $src2)", + [(set IntRegs:$dst, (shl IntRegs:$src1, IntRegs:$src2))]>; + +def ASR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = asr($src1, $src2)", + [(set IntRegs:$dst, (sra IntRegs:$src1, IntRegs:$src2))]>; + + +def LSR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = lsr($src1, $src2)", + [(set IntRegs:$dst, (srl IntRegs:$src1, IntRegs:$src2))]>; + +def LSLd : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), + "$dst = lsl($src1, $src2)", + [(set DoubleRegs:$dst, (shl DoubleRegs:$src1, IntRegs:$src2))]>; + +def ASRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + IntRegs:$src2), + "$dst = asr($src1, $src2)", + [(set DoubleRegs:$dst, (sra DoubleRegs:$src1, IntRegs:$src2))]>; + +def LSRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + IntRegs:$src2), + "$dst = lsr($src1, $src2)", + [(set DoubleRegs:$dst, (srl DoubleRegs:$src1, IntRegs:$src2))]>; + +//===----------------------------------------------------------------------===// +// STYPE/SHIFT - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/VH + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// STYPE/VH - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/VW + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// STYPE/VW - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SYSTEM/SUPER + +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SYSTEM/USER + +//===----------------------------------------------------------------------===// +def SDHexagonBARRIER: SDTypeProfile<0, 0, []>; +def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDHexagonBARRIER, + [SDNPHasChain]>; + +let hasSideEffects = 1 in +def BARRIER : STInst<(outs), (ins), + "barrier", + [(HexagonBARRIER)]>; + +//===----------------------------------------------------------------------===// +// SYSTEM/SUPER - +//===----------------------------------------------------------------------===// + +// TFRI64 - assembly mapped. +let isReMaterializable = 1 in +def TFRI64 : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1), + "$dst = #$src1", + [(set DoubleRegs:$dst, s8Imm64Pred:$src1)]>; + +// Pseudo instruction to encode a set of conditional transfers. +// This instruction is used instead of a mux and trades-off codesize +// for performance. We conduct this transformation optimistically in +// the hope that these instructions get promoted to dot-new transfers. +let AddedComplexity = 100 in +def TFR_condset_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, + IntRegs:$src2, + IntRegs:$src3), + "Error; should not emit", + [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +let AddedComplexity = 100 in +def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, s12Imm:$src2, s12Imm:$src3), + "Error; should not emit", + [(set IntRegs:$dst, (select PredRegs:$src1, + s12ImmPred:$src2, + s12ImmPred:$src3))]>; + +// Generate frameindex addresses. +let isReMaterializable = 1 in +def TFR_FI : ALU32_ri<(outs IntRegs:$dst), (ins FrameIndex:$src1), + "$dst = add($src1)", + [(set IntRegs:$dst, ADDRri:$src1)]>; + +// +// CR - Type. +// +let neverHasSideEffects = 1, Defs = [SA0, LC0] in { +def LOOP0_i : CRInst<(outs), (ins brtarget:$offset, u10Imm:$src2), + "loop0($offset, #$src2)", + []>; +} + +let neverHasSideEffects = 1, Defs = [SA0, LC0] in { +def LOOP0_r : CRInst<(outs), (ins brtarget:$offset, IntRegs:$src2), + "loop0($offset, $src2)", + []>; +} + +let isBranch = 1, isTerminator = 1, neverHasSideEffects = 1, + Defs = [PC, LC0], Uses = [SA0, LC0] in { +def ENDLOOP0 : CRInst<(outs), (ins brtarget:$offset), + ":endloop0", + []>; +} + +// Support for generating global address. +// Taken from X86InstrInfo.td. +def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, + SDTCisPtrTy<0>]>; +def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>; +def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>; + +// This pattern is incorrect. When we add small data, we should change +// this pattern to use memw(#foo). +let isMoveImm = 1 in +def CONST32 : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst = CONST32(#$global)", + [(set IntRegs:$dst, + (load (HexagonCONST32 tglobaltlsaddr:$global)))]>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST32_set : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst = CONST32(#$global)", + [(set IntRegs:$dst, + (HexagonCONST32 tglobaladdr:$global))]>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST32_set_jt : LDInst<(outs IntRegs:$dst), (ins jumptablebase:$jt), + "$dst = CONST32(#$jt)", + [(set IntRegs:$dst, + (HexagonCONST32 tjumptable:$jt))]>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST32GP_set : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst = CONST32(#$global)", + [(set IntRegs:$dst, + (HexagonCONST32_GP tglobaladdr:$global))]>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST32_Int_Real : LDInst<(outs IntRegs:$dst), (ins i32imm:$global), + "$dst = CONST32(#$global)", + [(set IntRegs:$dst, imm:$global) ]>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST32_Label : LDInst<(outs IntRegs:$dst), (ins bblabel:$label), + "$dst = CONST32($label)", + [(set IntRegs:$dst, (HexagonCONST32 bbl:$label))]>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST64_Int_Real : LDInst<(outs DoubleRegs:$dst), (ins i64imm:$global), + "$dst = CONST64(#$global)", + [(set DoubleRegs:$dst, imm:$global) ]>; + +def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins), + "$dst = xor($dst, $dst)", + [(set PredRegs:$dst, 0)]>; + +def MPY_trsext : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = mpy($src1, $src2)", + [(set IntRegs:$dst, + (trunc (i64 (srl (i64 (mul (i64 (sext IntRegs:$src1)), + (i64 (sext IntRegs:$src2)))), + (i32 32)))))]>; + +// Pseudo instructions. +def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; + +def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; + +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; + +def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; + +def call : SDNode<"HexagonISD::CALL", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain, +// Optional Flag and Variable Arguments. +// Its 1 Operand has pointer type. +def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +let Defs = [R29, R30], Uses = [R31, R30, R29] in { + def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), + "Should never be emitted", + [(callseq_start timm:$amt)]>; +} + +let Defs = [R29, R30, R31], Uses = [R29] in { + def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + "Should never be emitted", + [(callseq_end timm:$amt1, timm:$amt2)]>; +} +// Call subroutine. +let isCall = 1, neverHasSideEffects = 1, + Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, + R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { + def CALL : JInst<(outs), (ins calltarget:$dst, variable_ops), + "call $dst", []>; +} + +// Call subroutine from register. +let isCall = 1, neverHasSideEffects = 1, + Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, + R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { + def CALLR : JRInst<(outs), (ins IntRegs:$dst, variable_ops), + "callr $dst", + []>; + } + +// Tail Calls. +let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, + Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, + R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { + def TCRETURNtg : JInst<(outs), (ins calltarget:$dst, variable_ops), + "jump $dst // TAILCALL", []>; +} +let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, + Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, + R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { + def TCRETURNtext : JInst<(outs), (ins calltarget:$dst, variable_ops), + "jump $dst // TAILCALL", []>; +} + +let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, + Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, + R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { + def TCRETURNR : JInst<(outs), (ins IntRegs:$dst, variable_ops), + "jumpr $dst // TAILCALL", []>; +} +// Map call instruction. +def : Pat<(call IntRegs:$dst), + (CALLR IntRegs:$dst)>, Requires<[HasV2TOnly]>; +def : Pat<(call tglobaladdr:$dst), + (CALL tglobaladdr:$dst)>, Requires<[HasV2TOnly]>; +def : Pat<(call texternalsym:$dst), + (CALL texternalsym:$dst)>, Requires<[HasV2TOnly]>; +//Tail calls. +def : Pat<(HexagonTCRet tglobaladdr:$dst), + (TCRETURNtg tglobaladdr:$dst)>; +def : Pat<(HexagonTCRet texternalsym:$dst), + (TCRETURNtext texternalsym:$dst)>; +def : Pat<(HexagonTCRet IntRegs:$dst), + (TCRETURNR IntRegs:$dst)>; + +// Map from r0 = and(r1, 65535) to r0 = zxth(r1). +def : Pat <(and IntRegs:$src1, 65535), + (ZXTH IntRegs:$src1)>; + +// Map from r0 = and(r1, 255) to r0 = zxtb(r1). +def : Pat <(and IntRegs:$src1, 255), + (ZXTB IntRegs:$src1)>; + +// Map Add(p1, true) to p1 = not(p1). +// Add(p1, false) should never be produced, +// if it does, it got to be mapped to NOOP. +def : Pat <(add PredRegs:$src1, -1), + (NOT_pp PredRegs:$src1)>; + +// Map from p0 = setlt(r0, r1) r2 = mux(p0, r3, r4) => +// p0 = cmp.lt(r0, r1), r0 = mux(p0, r2, r1). +def : Pat <(select (i1 (setlt IntRegs:$src1, IntRegs:$src2)), IntRegs:$src3, + IntRegs:$src4), + (TFR_condset_rr (CMPLTrr IntRegs:$src1, IntRegs:$src2), IntRegs:$src4, + IntRegs:$src3)>, Requires<[HasV2TOnly]>; + +// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i). +def : Pat <(select (not PredRegs:$src1), s8ImmPred:$src2, s8ImmPred:$src3), + (TFR_condset_ii PredRegs:$src1, s8ImmPred:$src3, s8ImmPred:$src2)>; + +// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump. +def : Pat <(brcond (not PredRegs:$src1), bb:$offset), + (JMP_PredNot PredRegs:$src1, bb:$offset)>; + +// Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2). +def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)), + (AND_pnotp PredRegs:$src1, PredRegs:$src2)>; + +// Map from store(globaladdress + x) -> memd(#foo + x). +let AddedComplexity = 100 in +def : Pat <(store DoubleRegs:$src1, + (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (STrid_GP tglobaladdr:$global, u16ImmPred:$offset, DoubleRegs:$src1)>; + +// Map from store(globaladdress) -> memd(#foo + 0). +let AddedComplexity = 100 in +def : Pat <(store DoubleRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)), + (STrid_GP tglobaladdr:$global, 0, DoubleRegs:$src1)>; + +// Map from store(globaladdress + x) -> memw(#foo + x). +let AddedComplexity = 100 in +def : Pat <(store IntRegs:$src1, (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (STriw_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>; + +// Map from store(globaladdress) -> memw(#foo + 0). +let AddedComplexity = 100 in +def : Pat <(store IntRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)), + (STriw_GP tglobaladdr:$global, 0, IntRegs:$src1)>; + +// Map from store(globaladdress) -> memw(#foo + 0). +let AddedComplexity = 100 in +def : Pat <(store IntRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)), + (STriw_GP tglobaladdr:$global, 0, IntRegs:$src1)>; + +// Map from store(globaladdress + x) -> memh(#foo + x). +let AddedComplexity = 100 in +def : Pat <(truncstorei16 IntRegs:$src1, + (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (STrih_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>; + +// Map from store(globaladdress) -> memh(#foo). +let AddedComplexity = 100 in +def : Pat <(truncstorei16 IntRegs:$src1, + (HexagonCONST32_GP tglobaladdr:$global)), + (STh_GP tglobaladdr:$global, IntRegs:$src1)>; + +// Map from store(globaladdress + x) -> memb(#foo + x). +let AddedComplexity = 100 in +def : Pat <(truncstorei8 IntRegs:$src1, + (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (STrib_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>; + +// Map from store(globaladdress) -> memb(#foo). +let AddedComplexity = 100 in +def : Pat <(truncstorei8 IntRegs:$src1, + (HexagonCONST32_GP tglobaladdr:$global)), + (STb_GP tglobaladdr:$global, IntRegs:$src1)>; + +// Map from load(globaladdress + x) -> memw(#foo + x). +let AddedComplexity = 100 in +def : Pat <(load (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset)>; + +// Map from load(globaladdress) -> memw(#foo + 0). +let AddedComplexity = 100 in +def : Pat <(load (HexagonCONST32_GP tglobaladdr:$global)), + (LDw_GP tglobaladdr:$global)>; + +// Map from load(globaladdress + x) -> memd(#foo + x). +let AddedComplexity = 100 in +def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset))), + (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset)>; + +// Map from load(globaladdress) -> memw(#foo + 0). +let AddedComplexity = 100 in +def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))), + (LDd_GP tglobaladdr:$global)>; + + +// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress + 0), Pd = Rd. +let AddedComplexity = 100 in +def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))), + (TFR_PdRs (LDrib_GP tglobaladdr:$global, 0))>; + +// Map from load(globaladdress + x) -> memh(#foo + x). +let AddedComplexity = 100 in +def : Pat <(sextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (LDrih_GP tglobaladdr:$global, u16ImmPred:$offset)>; + +// Map from load(globaladdress) -> memh(#foo + 0). +let AddedComplexity = 100 in +def : Pat <(sextloadi16 (HexagonCONST32_GP tglobaladdr:$global)), + (LDrih_GP tglobaladdr:$global, 0)>; + +// Map from load(globaladdress + x) -> memuh(#foo + x). +let AddedComplexity = 100 in +def : Pat <(zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset)>; + +// Map from load(globaladdress) -> memuh(#foo + 0). +let AddedComplexity = 100 in +def : Pat <(zextloadi16 (HexagonCONST32_GP tglobaladdr:$global)), + (LDriuh_GP tglobaladdr:$global, 0)>; + +// Map from load(globaladdress + x) -> memuh(#foo + x). +let AddedComplexity = 100 in +def : Pat <(extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset)>; + +// Map from load(globaladdress) -> memuh(#foo + 0). +let AddedComplexity = 100 in +def : Pat <(extloadi16 (HexagonCONST32_GP tglobaladdr:$global)), + (LDriuh_GP tglobaladdr:$global, 0)>; +// Map from load(globaladdress + x) -> memub(#foo + x). +let AddedComplexity = 100 in +def : Pat <(zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset)>; + +// Map from load(globaladdress) -> memuh(#foo + 0). +let AddedComplexity = 100 in +def : Pat <(zextloadi8 (HexagonCONST32_GP tglobaladdr:$global)), + (LDriub_GP tglobaladdr:$global, 0)>; + +// Map from load(globaladdress + x) -> memb(#foo + x). +let AddedComplexity = 100 in +def : Pat <(sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset)>; + +// Map from load(globaladdress) -> memb(#foo). +let AddedComplexity = 100 in +def : Pat <(extloadi8 (HexagonCONST32_GP tglobaladdr:$global)), + (LDb_GP tglobaladdr:$global)>; + +// Map from load(globaladdress) -> memb(#foo). +let AddedComplexity = 100 in +def : Pat <(sextloadi8 (HexagonCONST32_GP tglobaladdr:$global)), + (LDb_GP tglobaladdr:$global)>; + +// Map from load(globaladdress) -> memub(#foo). +let AddedComplexity = 100 in +def : Pat <(zextloadi8 (HexagonCONST32_GP tglobaladdr:$global)), + (LDub_GP tglobaladdr:$global)>; + +// When the Interprocedural Global Variable optimizer realizes that a +// certain global variable takes only two constant values, it shrinks the +// global to a boolean. Catch those loads here in the following 3 patterns. +let AddedComplexity = 100 in +def : Pat <(extloadi1 (HexagonCONST32_GP tglobaladdr:$global)), + (LDb_GP tglobaladdr:$global)>; + +let AddedComplexity = 100 in +def : Pat <(sextloadi1 (HexagonCONST32_GP tglobaladdr:$global)), + (LDb_GP tglobaladdr:$global)>; + +let AddedComplexity = 100 in +def : Pat <(zextloadi1 (HexagonCONST32_GP tglobaladdr:$global)), + (LDub_GP tglobaladdr:$global)>; + +// Map from load(globaladdress) -> memh(#foo). +let AddedComplexity = 100 in +def : Pat <(extloadi16 (HexagonCONST32_GP tglobaladdr:$global)), + (LDh_GP tglobaladdr:$global)>; + +// Map from load(globaladdress) -> memh(#foo). +let AddedComplexity = 100 in +def : Pat <(sextloadi16 (HexagonCONST32_GP tglobaladdr:$global)), + (LDh_GP tglobaladdr:$global)>; + +// Map from load(globaladdress) -> memuh(#foo). +let AddedComplexity = 100 in +def : Pat <(zextloadi16 (HexagonCONST32_GP tglobaladdr:$global)), + (LDuh_GP tglobaladdr:$global)>; + +// Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned. +def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)), + (AND_rr (LDrib ADDRriS11_0:$addr), (TFRI 0x1))>; + +// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = SXTW(Rss.lo). +def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i32)), + (i64 (SXTW (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg)))>; + +// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = SXTW(SXTH(Rss.lo)). +def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i16)), + (i64 (SXTW (SXTH (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg))))>; + +// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = SXTW(SXTB(Rss.lo)). +def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i8)), + (i64 (SXTW (SXTB (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg))))>; + +// We want to prevent emiting pnot's as much as possible. +// Map brcond with an unsupported setcc to a JMP_PredNot. +def : Pat <(brcond (i1 (setne IntRegs:$src1, IntRegs:$src2)), bb:$offset), + (JMP_PredNot (CMPEQrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>; + +def : Pat <(brcond (i1 (setne IntRegs:$src1, s10ImmPred:$src2)), bb:$offset), + (JMP_PredNot (CMPEQri IntRegs:$src1, s10ImmPred:$src2), bb:$offset)>; + +def : Pat <(brcond (i1 (setne PredRegs:$src1, (i1 -1))), bb:$offset), + (JMP_PredNot PredRegs:$src1, bb:$offset)>; + +def : Pat <(brcond (i1 (setne PredRegs:$src1, (i1 0))), bb:$offset), + (JMP_Pred PredRegs:$src1, bb:$offset)>; + +def : Pat <(brcond (i1 (setlt IntRegs:$src1, s8ImmPred:$src2)), bb:$offset), + (JMP_PredNot (CMPGEri IntRegs:$src1, s8ImmPred:$src2), bb:$offset)>; + +def : Pat <(brcond (i1 (setlt IntRegs:$src1, IntRegs:$src2)), bb:$offset), + (JMP_Pred (CMPLTrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>; + +def : Pat <(brcond (i1 (setuge DoubleRegs:$src1, DoubleRegs:$src2)), + bb:$offset), + (JMP_PredNot (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1), + bb:$offset)>; + +def : Pat <(brcond (i1 (setule IntRegs:$src1, IntRegs:$src2)), bb:$offset), + (JMP_PredNot (CMPGTUrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>; + +def : Pat <(brcond (i1 (setule DoubleRegs:$src1, DoubleRegs:$src2)), + bb:$offset), + (JMP_PredNot (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2), + bb:$offset)>; + +// Map from a 64-bit select to an emulated 64-bit mux. +// Hexagon does not support 64-bit MUXes; so emulate with combines. +def : Pat <(select PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), + (COMBINE_rr + (MUX_rr PredRegs:$src1, + (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg), + (EXTRACT_SUBREG DoubleRegs:$src3, subreg_hireg)), + (MUX_rr PredRegs:$src1, + (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$src3, subreg_loreg)))>; + +// Map from a 1-bit select to logical ops. +// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3). +def : Pat <(select PredRegs:$src1, PredRegs:$src2, PredRegs:$src3), + (OR_pp (AND_pp PredRegs:$src1, PredRegs:$src2), + (AND_pp (NOT_pp PredRegs:$src1), PredRegs:$src3))>; + +// Map Pd = load(addr) -> Rs = load(addr); Pd = Rs. +def : Pat<(i1 (load ADDRriS11_2:$addr)), + (i1 (TFR_PdRs (i32 (LDrib ADDRriS11_2:$addr))))>; + +// Map for truncating from 64 immediates to 32 bit immediates. +def : Pat<(i32 (trunc DoubleRegs:$src)), + (i32 (EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg))>; + +// Map for truncating from i64 immediates to i1 bit immediates. +def : Pat<(i1 (trunc DoubleRegs:$src)), + (i1 (TFR_PdRs (i32(EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg))))>; + +// Map memw(Rs) = Rdd -> memw(Rs) = Rt. +def : Pat<(truncstorei8 DoubleRegs:$src, ADDRriS11_0:$addr), + (STrib ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src, + subreg_loreg)))>; + +// Map memh(Rs) = Rdd -> memh(Rs) = Rt. +def : Pat<(truncstorei16 DoubleRegs:$src, ADDRriS11_0:$addr), + (STrih ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src, + subreg_loreg)))>; + +// Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0. +def : Pat<(store (i1 -1), ADDRriS11_2:$addr), + (STrib ADDRriS11_2:$addr, (TFRI 1))>; + +let AddedComplexity = 100 in +// Map from i1 = constant<-1>; memw(CONST32(#foo)) = i1 -> r0 = 1; +// memw(#foo) = r0 +def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), + (STb_GP tglobaladdr:$global, (TFRI 1))>; + + +// Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0. +def : Pat<(store (i1 -1), ADDRriS11_2:$addr), + (STrib ADDRriS11_2:$addr, (TFRI 1))>; + +// Map from memb(Rs) = Pd -> Rt = mux(Pd, #0, #1); store Rt. +def : Pat<(store PredRegs:$src1, ADDRriS11_2:$addr), + (STrib ADDRriS11_2:$addr, (i32 (MUX_ii PredRegs:$src1, 1, 0)) )>; + +// Map Rdd = anyext(Rs) -> Rdd = sxtw(Rs). +// Hexagon_TODO: We can probably use combine but that will cost 2 instructions. +// Better way to do this? +def : Pat<(i64 (anyext IntRegs:$src1)), + (i64 (SXTW IntRegs:$src1))>; + +// Map cmple -> cmpgt. +// rs <= rt -> !(rs > rt). +def : Pat<(i1 (setle IntRegs:$src1, s10ImmPred:$src2)), + (i1 (NOT_Ps (CMPGTri IntRegs:$src1, s10ImmPred:$src2)))>; + +// rs <= rt -> !(rs > rt). +def : Pat<(i1 (setle IntRegs:$src1, IntRegs:$src2)), + (i1 (NOT_Ps (CMPGTrr IntRegs:$src1, IntRegs:$src2)))>; + +// Rss <= Rtt -> !(Rss > Rtt). +def : Pat<(i1 (setle DoubleRegs:$src1, DoubleRegs:$src2)), + (i1 (NOT_Ps (CMPGT64rr DoubleRegs:$src1, DoubleRegs:$src2)))>; + +// Map cmpne -> cmpeq. +// Hexagon_TODO: We should improve on this. +// rs != rt -> !(rs == rt). +def : Pat <(i1 (setne IntRegs:$src1, s10ImmPred:$src2)), + (i1 (NOT_Ps(i1 (CMPEQri IntRegs:$src1, s10ImmPred:$src2))))>; + +// Map cmpne(Rs) -> !cmpeqe(Rs). +// rs != rt -> !(rs == rt). +def : Pat <(i1 (setne IntRegs:$src1, IntRegs:$src2)), + (i1 (NOT_Ps(i1 (CMPEQrr IntRegs:$src1, IntRegs:$src2))))>; + +// Convert setne back to xor for hexagon since we compute w/ pred registers. +def : Pat <(i1 (setne PredRegs:$src1, PredRegs:$src2)), + (i1 (XOR_pp PredRegs:$src1, PredRegs:$src2))>; + +// Map cmpne(Rss) -> !cmpew(Rss). +// rs != rt -> !(rs == rt). +def : Pat <(i1 (setne DoubleRegs:$src1, DoubleRegs:$src2)), + (i1 (NOT_Ps(i1 (CMPEHexagon4rr DoubleRegs:$src1, DoubleRegs:$src2))))>; + +// Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt). +// rs >= rt -> !(rt > rs). +def : Pat <(i1 (setge IntRegs:$src1, IntRegs:$src2)), + (i1 (NOT_Ps(i1 (CMPGTrr IntRegs:$src2, IntRegs:$src1))))>; + +def : Pat <(i1 (setge IntRegs:$src1, s8ImmPred:$src2)), + (i1 (CMPGEri IntRegs:$src1, s8ImmPred:$src2))>; + +// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). +// rss >= rtt -> !(rtt > rss). +def : Pat <(i1 (setge DoubleRegs:$src1, DoubleRegs:$src2)), + (i1 (NOT_Ps(i1 (CMPGT64rr DoubleRegs:$src2, DoubleRegs:$src1))))>; + +// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm). +// rs < rt -> !(rs >= rt). +def : Pat <(i1 (setlt IntRegs:$src1, s8ImmPred:$src2)), + (i1 (NOT_Ps (CMPGEri IntRegs:$src1, s8ImmPred:$src2)))>; + +// Map cmplt(Rs, Rt) -> cmplt(Rs, Rt). +// rs < rt -> rs < rt. Let assembler map it. +def : Pat <(i1 (setlt IntRegs:$src1, IntRegs:$src2)), + (i1 (CMPLTrr IntRegs:$src2, IntRegs:$src1))>; + +// Map cmplt(Rss, Rtt) -> cmpgt(Rtt, Rss). +// rss < rtt -> (rtt > rss). +def : Pat <(i1 (setlt DoubleRegs:$src1, DoubleRegs:$src2)), + (i1 (CMPGT64rr DoubleRegs:$src2, DoubleRegs:$src1))>; + +// Map from cmpltu(Rs, Rd) -> !cmpgtu(Rs, Rd - 1). +// rs < rt -> rt > rs. +def : Pat <(i1 (setult IntRegs:$src1, IntRegs:$src2)), + (i1 (CMPGTUrr IntRegs:$src2, IntRegs:$src1))>; + +// Map from cmpltu(Rss, Rdd) -> !cmpgtu(Rss, Rdd - 1). +// rs < rt -> rt > rs. +def : Pat <(i1 (setult DoubleRegs:$src1, DoubleRegs:$src2)), + (i1 (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1))>; + +// Map from Rs >= Rt -> !(Rt > Rs). +// rs >= rt -> !(rt > rs). +def : Pat <(i1 (setuge IntRegs:$src1, IntRegs:$src2)), + (i1 (NOT_Ps (CMPGTUrr IntRegs:$src2, IntRegs:$src1)))>; + +// Map from Rs >= Rt -> !(Rt > Rs). +// rs >= rt -> !(rt > rs). +def : Pat <(i1 (setuge DoubleRegs:$src1, DoubleRegs:$src2)), + (i1 (NOT_Ps (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1)))>; + +// Map from cmpleu(Rs, Rs) -> !cmpgtu(Rs, Rs). +// Map from (Rs <= Rt) -> !(Rs > Rt). +def : Pat <(i1 (setule IntRegs:$src1, IntRegs:$src2)), + (i1 (NOT_Ps (CMPGTUrr IntRegs:$src1, IntRegs:$src2)))>; + +// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1). +// Map from (Rs <= Rt) -> !(Rs > Rt). +def : Pat <(i1 (setule DoubleRegs:$src1, DoubleRegs:$src2)), + (i1 (NOT_Ps (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2)))>; + +// Sign extends. +// i1 -> i32 +def : Pat <(i32 (sext PredRegs:$src1)), + (i32 (MUX_ii PredRegs:$src1, -1, 0))>; + +// Convert sign-extended load back to load and sign extend. +// i8 -> i64 +def: Pat <(i64 (sextloadi8 ADDRriS11_0:$src1)), + (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>; + +// Convert any-extended load back to load and sign extend. +// i8 -> i64 +def: Pat <(i64 (extloadi8 ADDRriS11_0:$src1)), + (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>; + +// Convert sign-extended load back to load and sign extend. +// i16 -> i64 +def: Pat <(i64 (sextloadi16 ADDRriS11_1:$src1)), + (i64 (SXTW (LDrih ADDRriS11_1:$src1)))>; + +// Convert sign-extended load back to load and sign extend. +// i32 -> i64 +def: Pat <(i64 (sextloadi32 ADDRriS11_2:$src1)), + (i64 (SXTW (LDriw ADDRriS11_2:$src1)))>; + + +// Zero extends. +// i1 -> i32 +def : Pat <(i32 (zext PredRegs:$src1)), + (i32 (MUX_ii PredRegs:$src1, 1, 0))>; + +// i1 -> i64 +def : Pat <(i64 (zext PredRegs:$src1)), + (i64 (COMBINE_rr (TFRI 0), (MUX_ii PredRegs:$src1, 1, 0)))>; + +// i32 -> i64 +def : Pat <(i64 (zext IntRegs:$src1)), + (i64 (COMBINE_rr (TFRI 0), IntRegs:$src1))>; + +// i8 -> i64 +def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)), + (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>; + +// i16 -> i64 +def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)), + (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>; + +// i32 -> i64 +def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)), + (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>; + +def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)), + (i32 (LDriw ADDRriS11_0:$src1))>; + +// Map from Rs = Pd to Pd = mux(Pd, #1, #0) +def : Pat <(i32 (zext PredRegs:$src1)), + (i32 (MUX_ii PredRegs:$src1, 1, 0))>; + +// Map from Rs = Pd to Pd = mux(Pd, #1, #0) +def : Pat <(i32 (anyext PredRegs:$src1)), + (i32 (MUX_ii PredRegs:$src1, 1, 0))>; + +// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0)) +def : Pat <(i64 (anyext PredRegs:$src1)), + (i64 (SXTW (i32 (MUX_ii PredRegs:$src1, 1, 0))))>; + + +// Any extended 64-bit load. +// anyext i32 -> i64 +def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)), + (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>; + +// anyext i16 -> i64. +def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)), + (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>; + +// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs). +def : Pat<(i64 (zext IntRegs:$src1)), + (i64 (COMBINE_rr (TFRI 0), IntRegs:$src1))>; + +// Multiply 64-bit unsigned and use upper result. +def : Pat <(mulhu DoubleRegs:$src1, DoubleRegs:$src2), + (MPYU64_acc(COMBINE_rr (TFRI 0), + (EXTRACT_SUBREG + (LSRd_ri(MPYU64_acc(MPYU64_acc(COMBINE_rr (TFRI 0), + (EXTRACT_SUBREG (LSRd_ri(MPYU64 + (EXTRACT_SUBREG DoubleRegs:$src1, + subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$src2, + subreg_loreg)), + 32) ,subreg_loreg)), + (EXTRACT_SUBREG DoubleRegs:$src1, + subreg_hireg), + (EXTRACT_SUBREG DoubleRegs:$src2, + subreg_loreg)), + (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)), + 32),subreg_loreg)), + (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg), + (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg) + )>; + +// Multiply 64-bit signed and use upper result. +def : Pat <(mulhs DoubleRegs:$src1, DoubleRegs:$src2), + (MPY64_acc(COMBINE_rr (TFRI 0), + (EXTRACT_SUBREG + (LSRd_ri(MPY64_acc(MPY64_acc(COMBINE_rr (TFRI 0), + (EXTRACT_SUBREG (LSRd_ri(MPYU64 + (EXTRACT_SUBREG DoubleRegs:$src1, + subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$src2, + subreg_loreg)), + 32) ,subreg_loreg)), + (EXTRACT_SUBREG DoubleRegs:$src1, + subreg_hireg), + (EXTRACT_SUBREG DoubleRegs:$src2, + subreg_loreg)), + (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)), + 32),subreg_loreg)), + (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg), + (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg) + )>; + +// Hexagon specific ISD nodes. +def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>]>; +def Hexagon_ADJDYNALLOC : SDNode<"HexagonISD::ADJDYNALLOC", + SDTHexagonADJDYNALLOC>; +// Needed to tag these instructions for stack layout. +let usesCustomInserter = 1 in +def ADJDYNALLOC : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1, + s16Imm:$src2), + "$dst = add($src1, #$src2)", + [(set IntRegs:$dst, (Hexagon_ADJDYNALLOC IntRegs:$src1, + s16ImmPred:$src2))]>; + +def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, []>; +def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>; +def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = $src1", + [(set IntRegs:$dst, (Hexagon_ARGEXTEND IntRegs:$src1))]>; + +let AddedComplexity = 100 in +def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND IntRegs:$src1), i16)), + (TFR IntRegs:$src1)>; + + +def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>; + +let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in +def BR_JT : JRInst<(outs), (ins IntRegs:$src), + "jumpr $src", + [(HexagonBR_JT IntRegs:$src)]>; +def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>; + +def : Pat<(HexagonWrapperJT tjumptable:$dst), + (CONST32_set_jt tjumptable:$dst)>; + + +//===----------------------------------------------------------------------===// +// V3 Instructions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrInfoV3.td" + +//===----------------------------------------------------------------------===// +// V3 Instructions - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// V4 Instructions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrInfoV4.td" diff --git a/lib/Target/Hexagon/HexagonInstrInfoV3.td b/lib/Target/Hexagon/HexagonInstrInfoV3.td new file mode 100644 index 0000000..a73897e --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrInfoV3.td @@ -0,0 +1,134 @@ +//=- HexagonInstrInfoV3.td - Target Desc. for Hexagon Target -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V3 instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// J + +//===----------------------------------------------------------------------===// +// Call subroutine. +let isCall = 1, neverHasSideEffects = 1, + Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31, + P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { + def CALLv3 : JInst<(outs), (ins calltarget:$dst, variable_ops), + "call $dst", []>, Requires<[HasV3T]>; +} + +//===----------------------------------------------------------------------===// +// J - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// JR + +//===----------------------------------------------------------------------===// +// Call subroutine from register. +let isCall = 1, neverHasSideEffects = 1, + Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31, + P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { + def CALLRv3 : JRInst<(outs), (ins IntRegs:$dst, variable_ops), + "callr $dst", + []>, Requires<[HasV3TOnly]>; + } + + +// if(p?.new) jumpr:t r? +let isReturn = 1, isTerminator = 1, isBarrier = 1, + Defs = [PC], Uses = [R31] in { + def JMPR_cPnewt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1.new) jumpr:t $src2", + []>, Requires<[HasV3T]>; +} + +// if (!p?.new) jumpr:t r? +let isReturn = 1, isTerminator = 1, isBarrier = 1, + Defs = [PC], Uses = [R31] in { + def JMPR_cNotPnewt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1.new) jumpr:t $src2", + []>, Requires<[HasV3T]>; +} + +// Not taken. +// if(p?.new) jumpr:nt r? +let isReturn = 1, isTerminator = 1, isBarrier = 1, + Defs = [PC], Uses = [R31] in { + def JMPR_cPnewNt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1.new) jumpr:nt $src2", + []>, Requires<[HasV3T]>; +} + +// if (!p?.new) jumpr:nt r? +let isReturn = 1, isTerminator = 1, isBarrier = 1, + Defs = [PC], Uses = [R31] in { + def JMPR_cNotPnewNt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1.new) jumpr:nt $src2", + []>, Requires<[HasV3T]>; +} + +//===----------------------------------------------------------------------===// +// JR - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU64/ALU + +//===----------------------------------------------------------------------===// + +let AddedComplexity = 200 in +def MAXw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = max($src2, $src1)", + [(set DoubleRegs:$dst, (select (i1 (setlt DoubleRegs:$src2, + DoubleRegs:$src1)), + DoubleRegs:$src1, + DoubleRegs:$src2))]>, +Requires<[HasV3T]>; + +let AddedComplexity = 200 in +def MINw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = min($src2, $src1)", + [(set DoubleRegs:$dst, (select (i1 (setgt DoubleRegs:$src2, + DoubleRegs:$src1)), + DoubleRegs:$src1, + DoubleRegs:$src2))]>, +Requires<[HasV3T]>; + +//===----------------------------------------------------------------------===// +// ALU64/ALU - +//===----------------------------------------------------------------------===// + + + + +//def : Pat <(brcond (i1 (seteq IntRegs:$src1, 0)), bb:$offset), +// (JMP_RegEzt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>; + +//def : Pat <(brcond (i1 (setne IntRegs:$src1, 0)), bb:$offset), +// (JMP_RegNzt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>; + +//def : Pat <(brcond (i1 (setle IntRegs:$src1, 0)), bb:$offset), +// (JMP_RegLezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>; + +//def : Pat <(brcond (i1 (setge IntRegs:$src1, 0)), bb:$offset), +// (JMP_RegGezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>; + +//def : Pat <(brcond (i1 (setgt IntRegs:$src1, -1)), bb:$offset), +// (JMP_RegGezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>; + + +// Map call instruction +def : Pat<(call IntRegs:$dst), + (CALLRv3 IntRegs:$dst)>, Requires<[HasV3T]>; +def : Pat<(call tglobaladdr:$dst), + (CALLv3 tglobaladdr:$dst)>, Requires<[HasV3T]>; +def : Pat<(call texternalsym:$dst), + (CALLv3 texternalsym:$dst)>, Requires<[HasV3T]>; diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td new file mode 100644 index 0000000..24218d0 --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -0,0 +1,3392 @@ +//=- HexagonInstrInfoV4.td - Target Desc. for Hexagon Target -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V4 instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +// Hexagon V4 Architecture spec defines 8 instruction classes: +// LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the +// compiler) + +// LD Instructions: +// ======================================== +// Loads (8/16/32/64 bit) +// Deallocframe + +// ST Instructions: +// ======================================== +// Stores (8/16/32/64 bit) +// Allocframe + +// ALU32 Instructions: +// ======================================== +// Arithmetic / Logical (32 bit) +// Vector Halfword + +// XTYPE Instructions (32/64 bit): +// ======================================== +// Arithmetic, Logical, Bit Manipulation +// Multiply (Integer, Fractional, Complex) +// Permute / Vector Permute Operations +// Predicate Operations +// Shift / Shift with Add/Sub/Logical +// Vector Byte ALU +// Vector Halfword (ALU, Shift, Multiply) +// Vector Word (ALU, Shift) + +// J Instructions: +// ======================================== +// Jump/Call PC-relative + +// JR Instructions: +// ======================================== +// Jump/Call Register + +// MEMOP Instructions: +// ======================================== +// Operation on memory (8/16/32 bit) + +// NV Instructions: +// ======================================== +// New-value Jumps +// New-value Stores + +// CR Instructions: +// ======================================== +// Control-Register Transfers +// Hardware Loop Setup +// Predicate Logicals & Reductions + +// SYSTEM Instructions (not implemented in the compiler): +// ======================================== +// Prefetch +// Cache Maintenance +// Bus Operations + + +//===----------------------------------------------------------------------===// +// ALU32 + +//===----------------------------------------------------------------------===// + +// Shift halfword. + +def ASLH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1) $dst = aslh($src2)", + []>, + Requires<[HasV4T]>; + +def ASLH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1) $dst = aslh($src2)", + []>, + Requires<[HasV4T]>; + +def ASLH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1.new) $dst = aslh($src2)", + []>, + Requires<[HasV4T]>; + +def ASLH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1.new) $dst = aslh($src2)", + []>, + Requires<[HasV4T]>; + +def ASRH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1) $dst = asrh($src2)", + []>, + Requires<[HasV4T]>; + +def ASRH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1) $dst = asrh($src2)", + []>, + Requires<[HasV4T]>; + +def ASRH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1.new) $dst = asrh($src2)", + []>, + Requires<[HasV4T]>; + +def ASRH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1.new) $dst = asrh($src2)", + []>, + Requires<[HasV4T]>; + +// Sign extend. + +def SXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1) $dst = sxtb($src2)", + []>, + Requires<[HasV4T]>; + +def SXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1) $dst = sxtb($src2)", + []>, + Requires<[HasV4T]>; + +def SXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1.new) $dst = sxtb($src2)", + []>, + Requires<[HasV4T]>; + +def SXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1.new) $dst = sxtb($src2)", + []>, + Requires<[HasV4T]>; + + +def SXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1) $dst = sxth($src2)", + []>, + Requires<[HasV4T]>; + +def SXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1) $dst = sxth($src2)", + []>, + Requires<[HasV4T]>; + +def SXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1.new) $dst = sxth($src2)", + []>, + Requires<[HasV4T]>; + +def SXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1.new) $dst = sxth($src2)", + []>, + Requires<[HasV4T]>; + +// Zero exten. + +let neverHasSideEffects = 1 in +def ZXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1) $dst = zxtb($src2)", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1 in +def ZXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1) $dst = zxtb($src2)", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1 in +def ZXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1.new) $dst = zxtb($src2)", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1 in +def ZXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1.new) $dst = zxtb($src2)", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1 in +def ZXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1) $dst = zxth($src2)", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1 in +def ZXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1) $dst = zxth($src2)", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1 in +def ZXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1.new) $dst = zxth($src2)", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1 in +def ZXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1.new) $dst = zxth($src2)", + []>, + Requires<[HasV4T]>; + + +//===----------------------------------------------------------------------===// +// ALU32 - +//===----------------------------------------------------------------------===// + + + +//===----------------------------------------------------------------------===// +// LD + +//===----------------------------------------------------------------------===// +/// +/// Make sure that in post increment load, the first operand is always the post +/// increment operand. +/// +//// Load doubleword. +// Rdd=memd(Re=#U6) + +// Rdd=memd(Rs+Rt<<#u2) +// Special case pattern for indexed load without offset which is easier to +// match. AddedComplexity of this pattern should be lower than base+offset load +// and lower yet than the more generic version with offset/shift below +// Similar approach is taken for all other base+index loads. +let AddedComplexity = 10, isPredicable = 1 in +def LDrid_indexed_V4 : LDInst<(outs DoubleRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst=memd($src1+$src2<<#0)", + [(set DoubleRegs:$dst, (load (add IntRegs:$src1, + IntRegs:$src2)))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 40, isPredicable = 1 in +def LDrid_indexed_shl_V4 : LDInst<(outs DoubleRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), + "$dst=memd($src1+$src2<<#$offset)", + [(set DoubleRegs:$dst, (load (add IntRegs:$src1, + (shl IntRegs:$src2, + u2ImmPred:$offset))))]>, + Requires<[HasV4T]>; + +//// Load doubleword conditionally. +// if ([!]Pv[.new]) Rd=memd(Rs+Rt<<#u2) +// if (Pv) Rd=memd(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrid_indexed_cPt_V4 : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst=memd($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memd(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrid_indexed_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst=memd($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memd(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrid_indexed_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst=memd($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memd(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrid_indexed_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst=memd($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rd=memd(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrid_indexed_shl_cPt_V4 : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1) $dst=memd($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memd(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrid_indexed_shl_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1.new) $dst=memd($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memd(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrid_indexed_shl_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1) $dst=memd($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memd(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrid_indexed_shl_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1.new) $dst=memd($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// Rdd=memd(Rt<<#u2+#U6) + +//// Load byte. +// Rd=memb(Re=#U6) + +// Rd=memb(Rs+Rt<<#u2) +let AddedComplexity = 10, isPredicable = 1 in +def LDrib_indexed_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst=memb($src1+$src2<<#0)", + [(set IntRegs:$dst, (sextloadi8 (add IntRegs:$src1, + IntRegs:$src2)))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 10, isPredicable = 1 in +def LDriub_indexed_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst=memub($src1+$src2<<#0)", + [(set IntRegs:$dst, (zextloadi8 (add IntRegs:$src1, + IntRegs:$src2)))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 10, isPredicable = 1 in +def LDriub_ae_indexed_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst=memub($src1+$src2<<#0)", + [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1, + IntRegs:$src2)))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 40, isPredicable = 1 in +def LDrib_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), + "$dst=memb($src1+$src2<<#$offset)", + [(set IntRegs:$dst, + (sextloadi8 (add IntRegs:$src1, + (shl IntRegs:$src2, + u2ImmPred:$offset))))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 40, isPredicable = 1 in +def LDriub_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), + "$dst=memub($src1+$src2<<#$offset)", + [(set IntRegs:$dst, + (zextloadi8 (add IntRegs:$src1, + (shl IntRegs:$src2, + u2ImmPred:$offset))))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 40, isPredicable = 1 in +def LDriub_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), + "$dst=memub($src1+$src2<<#$offset)", + [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1, + (shl IntRegs:$src2, + u2ImmPred:$offset))))]>, + Requires<[HasV4T]>; + +//// Load byte conditionally. +// if ([!]Pv[.new]) Rd=memb(Rs+Rt<<#u2) +// if (Pv) Rd=memb(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrib_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst=memb($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memb(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrib_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst=memb($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memb(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrib_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst=memb($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memb(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrib_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst=memb($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rd=memb(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrib_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1) $dst=memb($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memb(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrib_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1.new) $dst=memb($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memb(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrib_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1) $dst=memb($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memb(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrib_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1.new) $dst=memb($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +//// Load unsigned byte conditionally. +// if ([!]Pv[.new]) Rd=memub(Rs+Rt<<#u2) +// if (Pv) Rd=memub(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriub_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst=memub($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memub(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriub_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst=memub($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memub(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriub_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst=memub($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memub(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriub_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst=memub($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rd=memub(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriub_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1) $dst=memub($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memub(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriub_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1.new) $dst=memub($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memub(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriub_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1) $dst=memub($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memub(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriub_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1.new) $dst=memub($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// Rd=memb(Rt<<#u2+#U6) + +//// Load halfword +// Rd=memh(Re=#U6) + +// Rd=memh(Rs+Rt<<#u2) +let AddedComplexity = 10, isPredicable = 1 in +def LDrih_indexed_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst=memh($src1+$src2<<#0)", + [(set IntRegs:$dst, (sextloadi16 (add IntRegs:$src1, + IntRegs:$src2)))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 10, isPredicable = 1 in +def LDriuh_indexed_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst=memuh($src1+$src2<<#0)", + [(set IntRegs:$dst, (zextloadi16 (add IntRegs:$src1, + IntRegs:$src2)))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 10, isPredicable = 1 in +def LDriuh_ae_indexed_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst=memuh($src1+$src2<<#0)", + [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1, + IntRegs:$src2)))]>, + Requires<[HasV4T]>; + +// Rd=memh(Rs+Rt<<#u2) +let AddedComplexity = 40, isPredicable = 1 in +def LDrih_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), + "$dst=memh($src1+$src2<<#$offset)", + [(set IntRegs:$dst, + (sextloadi16 (add IntRegs:$src1, + (shl IntRegs:$src2, + u2ImmPred:$offset))))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 40, isPredicable = 1 in +def LDriuh_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), + "$dst=memuh($src1+$src2<<#$offset)", + [(set IntRegs:$dst, + (zextloadi16 (add IntRegs:$src1, + (shl IntRegs:$src2, + u2ImmPred:$offset))))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 40, isPredicable = 1 in +def LDriuh_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), + "$dst=memuh($src1+$src2<<#$offset)", + [(set IntRegs:$dst, + (extloadi16 (add IntRegs:$src1, + (shl IntRegs:$src2, + u2ImmPred:$offset))))]>, + Requires<[HasV4T]>; + +//// Load halfword conditionally. +// if ([!]Pv[.new]) Rd=memh(Rs+Rt<<#u2) +// if (Pv) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrih_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst=memh($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrih_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst=memh($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrih_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst=memh($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrih_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst=memh($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrih_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1) $dst=memh($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrih_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1.new) $dst=memh($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrih_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1) $dst=memh($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrih_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1.new) $dst=memh($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +//// Load unsigned halfword conditionally. +// if ([!]Pv[.new]) Rd=memuh(Rs+Rt<<#u2) +// if (Pv) Rd=memuh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriuh_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst=memuh($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memuh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriuh_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst=memuh($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memuh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriuh_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst=memuh($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriuh_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst=memuh($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rd=memuh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriuh_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1) $dst=memuh($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memuh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriuh_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1.new) $dst=memuh($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memuh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriuh_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1) $dst=memuh($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriuh_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1.new) $dst=memuh($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// Rd=memh(Rt<<#u2+#U6) + +//// Load word. +// Rd=memw(Re=#U6) + +// Rd=memw(Rs+Rt<<#u2) +let AddedComplexity = 10, isPredicable = 1 in +def LDriw_indexed_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst=memw($src1+$src2<<#0)", + [(set IntRegs:$dst, (load (add IntRegs:$src1, + IntRegs:$src2)))]>, + Requires<[HasV4T]>; + +// Rd=memw(Rs+Rt<<#u2) +let AddedComplexity = 40, isPredicable = 1 in +def LDriw_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), + "$dst=memw($src1+$src2<<#$offset)", + [(set IntRegs:$dst, (load (add IntRegs:$src1, + (shl IntRegs:$src2, + u2ImmPred:$offset))))]>, + Requires<[HasV4T]>; + +//// Load word conditionally. +// if ([!]Pv[.new]) Rd=memw(Rs+Rt<<#u2) +// if (Pv) Rd=memw(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriw_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst=memw($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriw_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst=memw($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriw_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst=memw($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriw_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst=memw($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriw_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1) $dst=memw($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriw_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1.new) $dst=memw($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriw_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1) $dst=memw($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriw_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1.new) $dst=memw($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// Rd=memw(Rt<<#u2+#U6) + + +// Post-inc Load, Predicated, Dot new + + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrid_cdnPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3), + "if ($src1.new) $dst1 = memd($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrid_cdnNotPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3), + "if (!$src1.new) $dst1 = memd($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrib_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), + "if ($src1.new) $dst1 = memb($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrib_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), + "if (!$src1.new) $dst1 = memb($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrih_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), + "if ($src1.new) $dst1 = memh($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrih_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), + "if (!$src1.new) $dst1 = memh($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriub_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), + "if ($src1.new) $dst1 = memub($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriub_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), + "if (!$src1.new) $dst1 = memub($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriuh_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), + "if ($src1.new) $dst1 = memuh($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriuh_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), + "if (!$src1.new) $dst1 = memuh($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriw_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3), + "if ($src1.new) $dst1 = memw($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriw_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3), + "if (!$src1.new) $dst1 = memw($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + + +//===----------------------------------------------------------------------===// +// LD - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ST + +//===----------------------------------------------------------------------===// +/// +/// Assumptions::: ****** DO NOT IGNORE ******** +/// 1. Make sure that in post increment store, the zero'th operand is always the +/// post increment operand. +/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the +/// last operand. +/// + +// Store doubleword. +// memd(Re=#U6)=Rtt +// TODO: needs to be implemented + +// memd(Rs+#s11:3)=Rtt +// memd(Rs+Ru<<#u2)=Rtt +let AddedComplexity = 10, isPredicable = 1 in +def STrid_indexed_shl_V4 : STInst<(outs), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, DoubleRegs:$src4), + "memd($src1+$src2<<#$src3) = $src4", + [(store DoubleRegs:$src4, (add IntRegs:$src1, + (shl IntRegs:$src2, u2ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// memd(Ru<<#u2+#U6)=Rtt +let AddedComplexity = 10 in +def STrid_shl_V4 : STInst<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, DoubleRegs:$src4), + "memd($src1<<#$src2+#$src3) = $src4", + [(store DoubleRegs:$src4, (shl IntRegs:$src1, + (add u2ImmPred:$src2, + u6ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// memd(Rx++#s4:3)=Rtt +// memd(Rx++#s4:3:circ(Mu))=Rtt +// memd(Rx++I:circ(Mu))=Rtt +// memd(Rx++Mu)=Rtt +// memd(Rx++Mu:brev)=Rtt +// memd(gp+#u16:3)=Rtt + +// Store doubleword conditionally. +// if ([!]Pv[.new]) memd(#u6)=Rtt +// TODO: needs to be implemented. + +// if ([!]Pv[.new]) memd(Rs+#u6:3)=Rtt +// if (Pv) memd(Rs+#u6:3)=Rtt +// if (Pv.new) memd(Rs+#u6:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2), + "if ($src1.new) memd($addr) = $src2", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memd(Rs+#u6:3)=Rtt +// if (!Pv.new) memd(Rs+#u6:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2), + "if (!$src1.new) memd($addr) = $src2", + []>, + Requires<[HasV4T]>; + +// if (Pv) memd(Rs+#u6:3)=Rtt +// if (Pv.new) memd(Rs+#u6:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_indexed_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3, + DoubleRegs:$src4), + "if ($src1.new) memd($src2+#$src3) = $src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memd(Rs+#u6:3)=Rtt +// if (!Pv.new) memd(Rs+#u6:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_indexed_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3, + DoubleRegs:$src4), + "if (!$src1.new) memd($src2+#$src3) = $src4", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memd(Rs+Ru<<#u2)=Rtt +// if (Pv) memd(Rs+Ru<<#u2)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_indexed_shl_cPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + DoubleRegs:$src5), + "if ($src1) memd($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memd(Rs+Ru<<#u2)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_indexed_shl_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + DoubleRegs:$src5), + "if ($src1) memd($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; +// if (!Pv) memd(Rs+Ru<<#u2)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_indexed_shl_cNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + DoubleRegs:$src5), + "if (!$src1) memd($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; +// if (!Pv.new) memd(Rs+Ru<<#u2)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_indexed_shl_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + DoubleRegs:$src5), + "if (!$src1.new) memd($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memd(Rx++#s4:3)=Rtt +// if (Pv) memd(Rx++#s4:3)=Rtt +// if (Pv.new) memd(Rx++#s4:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def POST_STdri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, + s4_3Imm:$offset), + "if ($src1.new) memd($src3++#$offset) = $src2", + [], + "$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv) memd(Rx++#s4:3)=Rtt +// if (!Pv.new) memd(Rx++#s4:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def POST_STdri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, + s4_3Imm:$offset), + "if (!$src1.new) memd($src3++#$offset) = $src2", + [], + "$src3 = $dst">, + Requires<[HasV4T]>; + + +// Store byte. +// memb(Re=#U6)=Rt +// TODO: needs to be implemented. +// memb(Rs+#s11:0)=Rt +// memb(Rs+#u6:0)=#S8 +let AddedComplexity = 10, isPredicable = 1 in +def STrib_imm_V4 : STInst<(outs), + (ins IntRegs:$src1, u6_0Imm:$src2, s8Imm:$src3), + "memb($src1+#$src2) = #$src3", + [(truncstorei8 s8ImmPred:$src3, (add IntRegs:$src1, + u6_0ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// memb(Rs+Ru<<#u2)=Rt +let AddedComplexity = 10, isPredicable = 1 in +def STrib_indexed_shl_V4 : STInst<(outs), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4), + "memb($src1+$src2<<#$src3) = $src4", + [(truncstorei8 IntRegs:$src4, (add IntRegs:$src1, + (shl IntRegs:$src2, + u2ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// memb(Ru<<#u2+#U6)=Rt +let AddedComplexity = 10 in +def STrib_shl_V4 : STInst<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + "memb($src1<<#$src2+#$src3) = $src4", + [(truncstorei8 IntRegs:$src4, (shl IntRegs:$src1, + (add u2ImmPred:$src2, + u6ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// memb(Rx++#s4:0:circ(Mu))=Rt +// memb(Rx++I:circ(Mu))=Rt +// memb(Rx++Mu)=Rt +// memb(Rx++Mu:brev)=Rt +// memb(gp+#u16:0)=Rt + + +// Store byte conditionally. +// if ([!]Pv[.new]) memb(#u6)=Rt +// if ([!]Pv[.new]) memb(Rs+#u6:0)=#S6 +// if (Pv) memb(Rs+#u6:0)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_imm_cPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4), + "if ($src1) memb($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memb(Rs+#u6:0)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_imm_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4), + "if ($src1.new) memb($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memb(Rs+#u6:0)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_imm_cNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4), + "if (!$src1) memb($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memb(Rs+#u6:0)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_imm_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4), + "if (!$src1.new) memb($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memb(Rs+#u6:0)=Rt +// if (Pv) memb(Rs+#u6:0)=Rt +// if (Pv.new) memb(Rs+#u6:0)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1.new) memb($addr) = $src2", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memb(Rs+#u6:0)=Rt +// if (!Pv.new) memb(Rs+#u6:0)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1.new) memb($addr) = $src2", + []>, + Requires<[HasV4T]>; + +// if (Pv) memb(Rs+#u6:0)=Rt +// if (!Pv) memb(Rs+#u6:0)=Rt +// if (Pv.new) memb(Rs+#u6:0)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_indexed_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), + "if ($src1.new) memb($src2+#$src3) = $src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memb(Rs+#u6:0)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_indexed_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), + "if (!$src1.new) memb($src2+#$src3) = $src4", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Rt +// if (Pv) memb(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STrib_indexed_shl_cPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1) memb($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memb(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STrib_indexed_shl_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1.new) memb($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memb(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STrib_indexed_shl_cNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1) memb($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memb(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STrib_indexed_shl_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memb(Rx++#s4:0)=Rt +// if (Pv) memb(Rx++#s4:0)=Rt +// if (Pv.new) memb(Rx++#s4:0)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_STbri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), + "if ($src1.new) memb($src3++#$offset) = $src2", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv) memb(Rx++#s4:0)=Rt +// if (!Pv.new) memb(Rx++#s4:0)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_STbri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), + "if (!$src1.new) memb($src3++#$offset) = $src2", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + + +// Store halfword. +// memh(Re=#U6)=Rt.H +// TODO: needs to be implemented + +// memh(Re=#U6)=Rt +// TODO: needs to be implemented + +// memh(Rs+#s11:1)=Rt.H +// memh(Rs+#s11:1)=Rt +// memh(Rs+#u6:1)=#S8 +let AddedComplexity = 10, isPredicable = 1 in +def STrih_imm_V4 : STInst<(outs), + (ins IntRegs:$src1, u6_1Imm:$src2, s8Imm:$src3), + "memh($src1+#$src2) = #$src3", + [(truncstorei16 s8ImmPred:$src3, (add IntRegs:$src1, + u6_1ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// memh(Rs+Ru<<#u2)=Rt.H +// TODO: needs to be implemented. + +// memh(Rs+Ru<<#u2)=Rt +let AddedComplexity = 10, isPredicable = 1 in +def STrih_indexed_shl_V4 : STInst<(outs), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4), + "memh($src1+$src2<<#$src3) = $src4", + [(truncstorei16 IntRegs:$src4, (add IntRegs:$src1, + (shl IntRegs:$src2, + u2ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// memh(Ru<<#u2+#U6)=Rt.H +// memh(Ru<<#u2+#U6)=Rt +let AddedComplexity = 10 in +def STrih_shl_V4 : STInst<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + "memh($src1<<#$src2+#$src3) = $src4", + [(truncstorei16 IntRegs:$src4, (shl IntRegs:$src1, + (add u2ImmPred:$src2, + u6ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// memh(Rx++#s4:1:circ(Mu))=Rt.H +// memh(Rx++#s4:1:circ(Mu))=Rt +// memh(Rx++I:circ(Mu))=Rt.H +// memh(Rx++I:circ(Mu))=Rt +// memh(Rx++Mu)=Rt.H +// memh(Rx++Mu)=Rt +// memh(Rx++Mu:brev)=Rt.H +// memh(Rx++Mu:brev)=Rt +// memh(gp+#u16:1)=Rt.H +// memh(gp+#u16:1)=Rt + + +// Store halfword conditionally. +// if ([!]Pv[.new]) memh(#u6)=Rt.H +// if ([!]Pv[.new]) memh(#u6)=Rt + +// if ([!]Pv[.new]) memh(Rs+#u6:1)=#S6 +// if (Pv) memh(Rs+#u6:1)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_imm_cPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4), + "if ($src1) memh($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memh(Rs+#u6:1)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_imm_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4), + "if ($src1.new) memh($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memh(Rs+#u6:1)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_imm_cNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4), + "if (!$src1) memh($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memh(Rs+#u6:1)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_imm_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4), + "if (!$src1.new) memh($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H +// TODO: needs to be implemented. + +// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt +// if (Pv) memh(Rs+#u6:1)=Rt +// if (Pv.new) memh(Rs+#u6:1)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1.new) memh($addr) = $src2", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memh(Rs+#u6:1)=Rt +// if (!Pv.new) memh(Rs+#u6:1)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1.new) memh($addr) = $src2", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memh(Rs+#u6:1)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_indexed_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), + "if ($src1.new) memh($src2+#$src3) = $src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memh(Rs+#u6:1)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_indexed_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), + "if (!$src1.new) memh($src2+#$src3) = $src4", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt.H +// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt +// if (Pv) memh(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STrih_indexed_shl_cPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1) memh($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memh(Rs+Ru<<#u2)=Rt +def STrih_indexed_shl_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1.new) memh($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memh(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STrih_indexed_shl_cNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1) memh($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memh(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STrih_indexed_shl_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H +// TODO: Needs to be implemented. + +// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt +// if (Pv) memh(Rx++#s4:1)=Rt +// if (Pv.new) memh(Rx++#s4:1)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_SThri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), + "if ($src1.new) memh($src3++#$offset) = $src2", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv) memh(Rx++#s4:1)=Rt +// if (!Pv.new) memh(Rx++#s4:1)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_SThri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), + "if (!$src1.new) memh($src3++#$offset) = $src2", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + + +// Store word. +// memw(Re=#U6)=Rt +// TODO: Needs to be implemented. + +// memw(Rs+#s11:2)=Rt +// memw(Rs+#u6:2)=#S8 +let AddedComplexity = 10, isPredicable = 1 in +def STriw_imm_V4 : STInst<(outs), + (ins IntRegs:$src1, u6_2Imm:$src2, s8Imm:$src3), + "memw($src1+#$src2) = #$src3", + [(store s8ImmPred:$src3, (add IntRegs:$src1, u6_2ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// memw(Rs+Ru<<#u2)=Rt +let AddedComplexity = 10, isPredicable = 1 in +def STriw_indexed_shl_V4 : STInst<(outs), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4), + "memw($src1+$src2<<#$src3) = $src4", + [(store IntRegs:$src4, (add IntRegs:$src1, + (shl IntRegs:$src2, u2ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// memw(Ru<<#u2+#U6)=Rt +let AddedComplexity = 10 in +def STriw_shl_V4 : STInst<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + "memw($src1<<#$src2+#$src3) = $src4", + [(store IntRegs:$src4, (shl IntRegs:$src1, + (add u2ImmPred:$src2, u6ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// memw(Rx++#s4:2)=Rt +// memw(Rx++#s4:2:circ(Mu))=Rt +// memw(Rx++I:circ(Mu))=Rt +// memw(Rx++Mu)=Rt +// memw(Rx++Mu:brev)=Rt +// memw(gp+#u16:2)=Rt + + +// Store word conditionally. +// if ([!]Pv[.new]) memw(#u6)=Rt +// TODO: Needs to be implemented. + +// if ([!]Pv[.new]) memw(Rs+#u6:2)=#S6 +// if (Pv) memw(Rs+#u6:2)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_imm_cPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4), + "if ($src1) memw($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memw(Rs+#u6:2)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_imm_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4), + "if ($src1.new) memw($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memw(Rs+#u6:2)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_imm_cNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4), + "if (!$src1) memw($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memw(Rs+#u6:2)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_imm_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4), + "if (!$src1.new) memw($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memw(Rs+#u6:2)=Rt +// if (Pv) memw(Rs+#u6:2)=Rt +// if (Pv.new) memw(Rs+#u6:2)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1.new) memw($addr) = $src2", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memw(Rs+#u6:2)=Rt +// if (!Pv.new) memw(Rs+#u6:2)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1.new) memw($addr) = $src2", + []>, + Requires<[HasV4T]>; + +// if (Pv) memw(Rs+#u6:2)=Rt +// if (!Pv) memw(Rs+#u6:2)=Rt +// if (Pv.new) memw(Rs+#u6:2)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_indexed_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), + "if ($src1.new) memw($src2+#$src3) = $src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memw(Rs+#u6:2)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_indexed_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), + "if (!$src1.new) memw($src2+#$src3) = $src4", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Rt +// if (Pv) memw(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STriw_indexed_shl_cPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1) memw($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memw(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STriw_indexed_shl_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1.new) memw($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memw(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STriw_indexed_shl_cNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1) memw($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memw(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STriw_indexed_shl_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memw(Rx++#s4:2)=Rt +// if (Pv) memw(Rx++#s4:2)=Rt +// if (Pv.new) memw(Rx++#s4:2)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_STwri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if ($src1.new) memw($src3++#$offset) = $src2", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv) memw(Rx++#s4:2)=Rt +// if (!Pv.new) memw(Rx++#s4:2)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_STwri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if (!$src1.new) memw($src3++#$offset) = $src2", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + + +//===----------------------------------------------------------------------=== +// ST - +//===----------------------------------------------------------------------=== + + +//===----------------------------------------------------------------------===// +// NV/ST + +//===----------------------------------------------------------------------===// + +// Store new-value byte. + +// memb(Re=#U6)=Nt.new +// memb(Rs+#s11:0)=Nt.new +let mayStore = 1, isPredicable = 1 in +def STrib_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1), + "memb($addr) = $src1.new", + []>, + Requires<[HasV4T]>; + +let mayStore = 1, isPredicable = 1 in +def STrib_indexed_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3), + "memb($src1+#$src2) = $src3.new", + []>, + Requires<[HasV4T]>; + +// memb(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in +def STrib_indexed_shl_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4), + "memb($src1+$src2<<#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// memb(Ru<<#u2+#U6)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STrib_shl_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + "memb($src1<<#$src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// memb(Rx++#s4:0)=Nt.new +let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in +def POST_STbri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, s4_0Imm:$offset), + "memb($src2++#$offset) = $src1.new", + [], + "$src2 = $dst">, + Requires<[HasV4T]>; + +// memb(Rx++#s4:0:circ(Mu))=Nt.new +// memb(Rx++I:circ(Mu))=Nt.new +// memb(Rx++Mu)=Nt.new +// memb(Rx++Mu:brev)=Nt.new + +// memb(gp+#u16:0)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_GP_nv_V4 : NVInst_V4<(outs), + (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), + "memb(#$global+$offset) = $src.new", + []>, + Requires<[HasV4T]>; + + +// Store new-value byte conditionally. +// if ([!]Pv[.new]) memb(#u6)=Nt.new +// if (Pv) memb(Rs+#u6:0)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1) memb($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memb(Rs+#u6:0)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1.new) memb($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memb(Rs+#u6:0)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1) memb($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memb(Rs+#u6:0)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1.new) memb($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (Pv) memb(Rs+#u6:0)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_indexed_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), + "if ($src1) memb($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memb(Rs+#u6:0)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_indexed_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), + "if ($src1.new) memb($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memb(Rs+#u6:0)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_indexed_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), + "if (!$src1) memb($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memb(Rs+#u6:0)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), + "if (!$src1.new) memb($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + + +// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Nt.new +// if (Pv) memb(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STrib_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1) memb($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memb(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STrib_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1.new) memb($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memb(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STrib_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1) memb($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memb(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STrib_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memb(Rx++#s4:0)=Nt.new +// if (Pv) memb(Rx++#s4:0)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_STbri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), + "if ($src1) memb($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (Pv.new) memb(Rx++#s4:0)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_STbri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), + "if ($src1.new) memb($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv) memb(Rx++#s4:0)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_STbri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), + "if (!$src1) memb($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv.new) memb(Rx++#s4:0)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_STbri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), + "if (!$src1.new) memb($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + + +// Store new-value halfword. +// memh(Re=#U6)=Nt.new +// memh(Rs+#s11:1)=Nt.new +let mayStore = 1, isPredicable = 1 in +def STrih_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1), + "memh($addr) = $src1.new", + []>, + Requires<[HasV4T]>; + +let mayStore = 1, isPredicable = 1 in +def STrih_indexed_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, s11_1Imm:$src2, IntRegs:$src3), + "memh($src1+#$src2) = $src3.new", + []>, + Requires<[HasV4T]>; + +// memh(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in +def STrih_indexed_shl_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4), + "memh($src1+$src2<<#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// memh(Ru<<#u2+#U6)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STrih_shl_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + "memh($src1<<#$src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// memh(Rx++#s4:1)=Nt.new +let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in +def POST_SThri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, s4_1Imm:$offset), + "memh($src2++#$offset) = $src1.new", + [], + "$src2 = $dst">, + Requires<[HasV4T]>; + +// memh(Rx++#s4:1:circ(Mu))=Nt.new +// memh(Rx++I:circ(Mu))=Nt.new +// memh(Rx++Mu)=Nt.new +// memh(Rx++Mu:brev)=Nt.new + +// memh(gp+#u16:1)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_GP_nv_V4 : NVInst_V4<(outs), + (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), + "memh(#$global+$offset) = $src.new", + []>, + Requires<[HasV4T]>; + + +// Store new-value halfword conditionally. + +// if ([!]Pv[.new]) memh(#u6)=Nt.new + +// if ([!]Pv[.new]) memh(Rs+#u6:1)=Nt.new +// if (Pv) memh(Rs+#u6:1)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1) memh($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memh(Rs+#u6:1)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1.new) memh($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memh(Rs+#u6:1)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1) memh($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memh(Rs+#u6:1)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1.new) memh($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (Pv) memh(Rs+#u6:1)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_indexed_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), + "if ($src1) memh($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memh(Rs+#u6:1)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_indexed_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), + "if ($src1.new) memh($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memh(Rs+#u6:1)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_indexed_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), + "if (!$src1) memh($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memh(Rs+#u6:1)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), + "if (!$src1.new) memh($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Nt.new +// if (Pv) memh(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STrih_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1) memh($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memh(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STrih_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1.new) memh($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memh(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STrih_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1) memh($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memh(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STrih_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[]) memh(Rx++#s4:1)=Nt.new +// if (Pv) memh(Rx++#s4:1)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_SThri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), + "if ($src1) memh($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (Pv.new) memh(Rx++#s4:1)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_SThri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), + "if ($src1.new) memh($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv) memh(Rx++#s4:1)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_SThri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), + "if (!$src1) memh($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv.new) memh(Rx++#s4:1)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_SThri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), + "if (!$src1.new) memh($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + + +// Store new-value word. + +// memw(Re=#U6)=Nt.new +// memw(Rs+#s11:2)=Nt.new +let mayStore = 1, isPredicable = 1 in +def STriw_nv_V4 : NVInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$src1), + "memw($addr) = $src1.new", + []>, + Requires<[HasV4T]>; + +let mayStore = 1, isPredicable = 1 in +def STriw_indexed_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3), + "memw($src1+#$src2) = $src3.new", + []>, + Requires<[HasV4T]>; + +// memw(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in +def STriw_indexed_shl_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4), + "memw($src1+$src2<<#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// memw(Ru<<#u2+#U6)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STriw_shl_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + "memw($src1<<#$src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// memw(Rx++#s4:2)=Nt.new +let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in +def POST_STwri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, s4_2Imm:$offset), + "memw($src2++#$offset) = $src1.new", + [], + "$src2 = $dst">, + Requires<[HasV4T]>; + +// memw(Rx++#s4:2:circ(Mu))=Nt.new +// memw(Rx++I:circ(Mu))=Nt.new +// memw(Rx++Mu)=Nt.new +// memw(Rx++Mu:brev)=Nt.new +// memw(gp+#u16:2)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_GP_nv_V4 : NVInst_V4<(outs), + (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), + "memw(#$global+$offset) = $src.new", + []>, + Requires<[HasV4T]>; + + +// Store new-value word conditionally. + +// if ([!]Pv[.new]) memw(#u6)=Nt.new + +// if ([!]Pv[.new]) memw(Rs+#u6:2)=Nt.new +// if (Pv) memw(Rs+#u6:2)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1) memw($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memw(Rs+#u6:2)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1.new) memw($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memw(Rs+#u6:2)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1) memw($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memw(Rs+#u6:2)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1.new) memw($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (Pv) memw(Rs+#u6:2)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_indexed_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), + "if ($src1) memw($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memw(Rs+#u6:2)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_indexed_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), + "if ($src1.new) memw($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memw(Rs+#u6:2)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_indexed_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), + "if (!$src1) memw($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memw(Rs+#u6:2)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), + "if (!$src1.new) memw($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + + +// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Nt.new +// if (Pv) memw(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STriw_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1) memw($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memw(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STriw_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1.new) memw($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memw(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STriw_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1) memw($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memw(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STriw_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memw(Rx++#s4:2)=Nt.new +// if (Pv) memw(Rx++#s4:2)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_STwri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if ($src1) memw($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (Pv.new) memw(Rx++#s4:2)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_STwri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if ($src1.new) memw($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv) memw(Rx++#s4:2)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_STwri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if (!$src1) memw($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv.new) memw(Rx++#s4:2)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_STwri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if (!$src1.new) memw($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + + +//===----------------------------------------------------------------------===// +// NV/ST - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// XTYPE/ALU + +//===----------------------------------------------------------------------===// + +// Add and accumulate. +// Rd=add(Rs,add(Ru,#s6)) +def ADDr_ADDri_V4 : MInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3), + "$dst = add($src1, add($src2, #$src3))", + [(set IntRegs:$dst, + (add IntRegs:$src1, (add IntRegs:$src2, s6ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// Rd=add(Rs,sub(#s6,Ru)) +def ADDr_SUBri_V4 : MInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3), + "$dst = add($src1, sub(#$src2, $src3))", + [(set IntRegs:$dst, + (add IntRegs:$src1, (sub s6ImmPred:$src2, IntRegs:$src3)))]>, + Requires<[HasV4T]>; + +// Generates the same instruction as ADDr_SUBri_V4 but matches different +// pattern. +// Rd=add(Rs,sub(#s6,Ru)) +def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3), + "$dst = add($src1, sub(#$src2, $src3))", + [(set IntRegs:$dst, + (sub (add IntRegs:$src1, s6ImmPred:$src2), IntRegs:$src3))]>, + Requires<[HasV4T]>; + + +// Add or subtract doublewords with carry. +//TODO: +// Rdd=add(Rss,Rtt,Px):carry +//TODO: +// Rdd=sub(Rss,Rtt,Px):carry + + +// Logical doublewords. +// Rdd=and(Rtt,~Rss) +def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2), + "$dst = and($src1, ~$src2)", + [(set DoubleRegs:$dst, (and DoubleRegs:$src1, + (not DoubleRegs:$src2)))]>, + Requires<[HasV4T]>; + +// Rdd=or(Rtt,~Rss) +def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2), + "$dst = or($src1, ~$src2)", + [(set DoubleRegs:$dst, + (or DoubleRegs:$src1, (not DoubleRegs:$src2)))]>, + Requires<[HasV4T]>; + + +// Logical-logical doublewords. +// Rxx^=xor(Rss,Rtt) +def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), + "$dst ^= xor($src2, $src3)", + [(set DoubleRegs:$dst, + (xor DoubleRegs:$src1, (xor DoubleRegs:$src2, DoubleRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + + +// Logical-logical words. +// Rx=or(Ru,and(Rx,#s10)) +def ORr_ANDri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3), + "$dst = or($src1, and($src2, #$src3))", + [(set IntRegs:$dst, + (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +// Rx[&|^]=and(Rs,Rt) +// Rx&=and(Rs,Rt) +def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst &= and($src2, $src3)", + [(set IntRegs:$dst, + (and IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx|=and(Rs,Rt) +def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst |= and($src2, $src3)", + [(set IntRegs:$dst, + (or IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx^=and(Rs,Rt) +def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst ^= and($src2, $src3)", + [(set IntRegs:$dst, + (xor IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx[&|^]=and(Rs,~Rt) +// Rx&=and(Rs,~Rt) +def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst &= and($src2, ~$src3)", + [(set IntRegs:$dst, + (and IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx|=and(Rs,~Rt) +def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst |= and($src2, ~$src3)", + [(set IntRegs:$dst, + (or IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx^=and(Rs,~Rt) +def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst ^= and($src2, ~$src3)", + [(set IntRegs:$dst, + (xor IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx[&|^]=or(Rs,Rt) +// Rx&=or(Rs,Rt) +def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst &= or($src2, $src3)", + [(set IntRegs:$dst, + (and IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx|=or(Rs,Rt) +def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst |= or($src2, $src3)", + [(set IntRegs:$dst, + (or IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx^=or(Rs,Rt) +def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst ^= or($src2, $src3)", + [(set IntRegs:$dst, + (xor IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx[&|^]=xor(Rs,Rt) +// Rx&=xor(Rs,Rt) +def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst &= xor($src2, $src3)", + [(set IntRegs:$dst, + (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx|=xor(Rs,Rt) +def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst |= xor($src2, $src3)", + [(set IntRegs:$dst, + (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx^=xor(Rs,Rt) +def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst ^= xor($src2, $src3)", + [(set IntRegs:$dst, + (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx|=and(Rs,#s10) +def ORr_ANDri2_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3), + "$dst |= and($src2, #$src3)", + [(set IntRegs:$dst, + (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx|=or(Rs,#s10) +def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3), + "$dst |= or($src2, #$src3)", + [(set IntRegs:$dst, + (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + + +// Modulo wrap +// Rd=modwrap(Rs,Rt) +// Round +// Rd=cround(Rs,#u5) +// Rd=cround(Rs,Rt) +// Rd=round(Rs,#u5)[:sat] +// Rd=round(Rs,Rt)[:sat] +// Vector reduce add unsigned halfwords +// Rd=vraddh(Rss,Rtt) +// Vector add bytes +// Rdd=vaddb(Rss,Rtt) +// Vector conditional negate +// Rdd=vcnegh(Rss,Rt) +// Rxx+=vrcnegh(Rss,Rt) +// Vector maximum bytes +// Rdd=vmaxb(Rtt,Rss) +// Vector reduce maximum halfwords +// Rxx=vrmaxh(Rss,Ru) +// Rxx=vrmaxuh(Rss,Ru) +// Vector reduce maximum words +// Rxx=vrmaxuw(Rss,Ru) +// Rxx=vrmaxw(Rss,Ru) +// Vector minimum bytes +// Rdd=vminb(Rtt,Rss) +// Vector reduce minimum halfwords +// Rxx=vrminh(Rss,Ru) +// Rxx=vrminuh(Rss,Ru) +// Vector reduce minimum words +// Rxx=vrminuw(Rss,Ru) +// Rxx=vrminw(Rss,Ru) +// Vector subtract bytes +// Rdd=vsubb(Rss,Rtt) + +//===----------------------------------------------------------------------===// +// XTYPE/ALU - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// XTYPE/MPY + +//===----------------------------------------------------------------------===// + +// Multiply and user lower result. +// Rd=add(#u6,mpyi(Rs,#U6)) +def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst), + (ins u6Imm:$src1, IntRegs:$src2, u6Imm:$src3), + "$dst = add(#$src1, mpyi($src2, #$src3))", + [(set IntRegs:$dst, + (add (mul IntRegs:$src2, u6ImmPred:$src3), u6ImmPred:$src1))]>, + Requires<[HasV4T]>; + +// Rd=add(#u6,mpyi(Rs,Rt)) + +def ADDi_MPYrr_V4 : MInst<(outs IntRegs:$dst), + (ins u6Imm:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst = add(#$src1, mpyi($src2, $src3))", + [(set IntRegs:$dst, + (add (mul IntRegs:$src2, IntRegs:$src3), u6ImmPred:$src1))]>, + Requires<[HasV4T]>; + +// Rd=add(Ru,mpyi(#u6:2,Rs)) +def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, u6Imm:$src2, IntRegs:$src3), + "$dst = add($src1, mpyi(#$src2, $src3))", + [(set IntRegs:$dst, + (add IntRegs:$src1, (mul IntRegs:$src3, u6_2ImmPred:$src2)))]>, + Requires<[HasV4T]>; + +// Rd=add(Ru,mpyi(Rs,#u6)) +def ADDr_MPYri_V4 : MInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u6Imm:$src3), + "$dst = add($src1, mpyi($src2, #$src3))", + [(set IntRegs:$dst, + (add IntRegs:$src1, (mul IntRegs:$src2, u6ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// Rx=add(Ru,mpyi(Rx,Rs)) +def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst = add($src1, mpyi($src2, $src3))", + [(set IntRegs:$dst, + (add IntRegs:$src1, (mul IntRegs:$src2, IntRegs:$src3)))], + "$src2 = $dst">, + Requires<[HasV4T]>; + + +// Polynomial multiply words +// Rdd=pmpyw(Rs,Rt) +// Rxx^=pmpyw(Rs,Rt) + +// Vector reduce multiply word by signed half (32x16) +// Rdd=vrmpyweh(Rss,Rtt)[:<<1] +// Rdd=vrmpywoh(Rss,Rtt)[:<<1] +// Rxx+=vrmpyweh(Rss,Rtt)[:<<1] +// Rxx+=vrmpywoh(Rss,Rtt)[:<<1] + +// Multiply and use upper result +// Rd=mpy(Rs,Rt.H):<<1:sat +// Rd=mpy(Rs,Rt.L):<<1:sat +// Rd=mpy(Rs,Rt):<<1 +// Rd=mpy(Rs,Rt):<<1:sat +// Rd=mpysu(Rs,Rt) +// Rx+=mpy(Rs,Rt):<<1:sat +// Rx-=mpy(Rs,Rt):<<1:sat + +// Vector multiply bytes +// Rdd=vmpybsu(Rs,Rt) +// Rdd=vmpybu(Rs,Rt) +// Rxx+=vmpybsu(Rs,Rt) +// Rxx+=vmpybu(Rs,Rt) + +// Vector polynomial multiply halfwords +// Rdd=vpmpyh(Rs,Rt) +// Rxx^=vpmpyh(Rs,Rt) + +//===----------------------------------------------------------------------===// +// XTYPE/MPY - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// XTYPE/SHIFT + +//===----------------------------------------------------------------------===// + +// Shift by immediate and accumulate. +// Rx=add(#u8,asl(Rx,#U5)) +def ADDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = add(#$src1, asl($src2, #$src3))", + [(set IntRegs:$dst, + (add (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +// Rx=add(#u8,lsr(Rx,#U5)) +def ADDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = add(#$src1, lsr($src2, #$src3))", + [(set IntRegs:$dst, + (add (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +// Rx=sub(#u8,asl(Rx,#U5)) +def SUBi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = sub(#$src1, asl($src2, #$src3))", + [(set IntRegs:$dst, + (sub (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +// Rx=sub(#u8,lsr(Rx,#U5)) +def SUBi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = sub(#$src1, lsr($src2, #$src3))", + [(set IntRegs:$dst, + (sub (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + + +//Shift by immediate and logical. +//Rx=and(#u8,asl(Rx,#U5)) +def ANDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = and(#$src1, asl($src2, #$src3))", + [(set IntRegs:$dst, + (and (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +//Rx=and(#u8,lsr(Rx,#U5)) +def ANDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = and(#$src1, lsr($src2, #$src3))", + [(set IntRegs:$dst, + (and (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +//Rx=or(#u8,asl(Rx,#U5)) +def ORi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = or(#$src1, asl($src2, #$src3))", + [(set IntRegs:$dst, + (or (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +//Rx=or(#u8,lsr(Rx,#U5)) +def ORi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = or(#$src1, lsr($src2, #$src3))", + [(set IntRegs:$dst, + (or (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + + +//Shift by register. +//Rd=lsl(#s6,Rt) +def LSLi_V4 : MInst<(outs IntRegs:$dst), (ins s6Imm:$src1, IntRegs:$src2), + "$dst = lsl(#$src1, $src2)", + [(set IntRegs:$dst, (shl s6ImmPred:$src1, IntRegs:$src2))]>, + Requires<[HasV4T]>; + + +//Shift by register and logical. +//Rxx^=asl(Rss,Rt) +def ASLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), + "$dst ^= asl($src2, $src3)", + [(set DoubleRegs:$dst, + (xor DoubleRegs:$src1, (shl DoubleRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +//Rxx^=asr(Rss,Rt) +def ASRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), + "$dst ^= asr($src2, $src3)", + [(set DoubleRegs:$dst, + (xor DoubleRegs:$src1, (sra DoubleRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +//Rxx^=lsl(Rss,Rt) +def LSLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), + "$dst ^= lsl($src2, $src3)", + [(set DoubleRegs:$dst, + (xor DoubleRegs:$src1, (shl DoubleRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +//Rxx^=lsr(Rss,Rt) +def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), + "$dst ^= lsr($src2, $src3)", + [(set DoubleRegs:$dst, + (xor DoubleRegs:$src1, (srl DoubleRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + + +//===----------------------------------------------------------------------===// +// XTYPE/SHIFT - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MEMOP: Word, Half, Byte +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MEMOP: Word +// +// Implemented: +// MEMw_ADDi_indexed_V4 : memw(Rs+#u6:2)+=#U5 +// MEMw_SUBi_indexed_V4 : memw(Rs+#u6:2)-=#U5 +// MEMw_ADDr_indexed_V4 : memw(Rs+#u6:2)+=Rt +// MEMw_SUBr_indexed_V4 : memw(Rs+#u6:2)-=Rt +// MEMw_CLRr_indexed_V4 : memw(Rs+#u6:2)&=Rt +// MEMw_SETr_indexed_V4 : memw(Rs+#u6:2)|=Rt +// MEMw_ADDi_V4 : memw(Rs+#u6:2)+=#U5 +// MEMw_SUBi_V4 : memw(Rs+#u6:2)-=#U5 +// MEMw_ADDr_V4 : memw(Rs+#u6:2)+=Rt +// MEMw_SUBr_V4 : memw(Rs+#u6:2)-=Rt +// MEMw_CLRr_V4 : memw(Rs+#u6:2)&=Rt +// MEMw_SETr_V4 : memw(Rs+#u6:2)|=Rt +// +// Not implemented: +// MEMw_CLRi_indexed_V4 : memw(Rs+#u6:2)=clrbit(#U5) +// MEMw_SETi_indexed_V4 : memw(Rs+#u6:2)=setbit(#U5) +// MEMw_CLRi_V4 : memw(Rs+#u6:2)=clrbit(#U5) +// MEMw_SETi_V4 : memw(Rs+#u6:2)=setbit(#U5) +//===----------------------------------------------------------------------===// + + +// MEMw_ADDSUBi_indexed_V4: +// pseudo operation for MEMw_ADDi_indexed_V4 and +// MEMw_SUBi_indexed_V4 a later pass will change it +// to the corresponding pattern. +let AddedComplexity = 30 in +def MEMw_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_2Imm:$offset, m6Imm:$addend), + "Error; should not emit", + [(store (add (load (add IntRegs:$base, u6_2ImmPred:$offset)), +m6ImmPred:$addend), + (add IntRegs:$base, u6_2ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) += #U5 +let AddedComplexity = 30 in +def MEMw_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$addend), + "memw($base+#$offset) += $addend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) -= #U5 +let AddedComplexity = 30 in +def MEMw_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$subend), + "memw($base+#$offset) -= $subend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) += Rt +let AddedComplexity = 30 in +def MEMw_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$addend), + "memw($base+#$offset) += $addend", + [(store (add (load (add IntRegs:$base, u6_2ImmPred:$offset)), +IntRegs:$addend), + (add IntRegs:$base, u6_2ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) -= Rt +let AddedComplexity = 30 in +def MEMw_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$subend), + "memw($base+#$offset) -= $subend", + [(store (sub (load (add IntRegs:$base, u6_2ImmPred:$offset)), +IntRegs:$subend), + (add IntRegs:$base, u6_2ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) &= Rt +let AddedComplexity = 30 in +def MEMw_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$andend), + "memw($base+#$offset) += $andend", + [(store (and (load (add IntRegs:$base, u6_2ImmPred:$offset)), +IntRegs:$andend), + (add IntRegs:$base, u6_2ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) |= Rt +let AddedComplexity = 30 in +def MEMw_ORr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$orend), + "memw($base+#$offset) |= $orend", + [(store (or (load (add IntRegs:$base, u6_2ImmPred:$offset)), + IntRegs:$orend), + (add IntRegs:$base, u6_2ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// MEMw_ADDSUBi_V4: +// Pseudo operation for MEMw_ADDi_V4 and MEMw_SUBi_V4 +// a later pass will change it to the right pattern. +let AddedComplexity = 30 in +def MEMw_ADDSUBi_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, m6Imm:$addend), + "Error; should not emit", + [(store (add (load ADDRriU6_2:$addr), m6ImmPred:$addend), + ADDRriU6_2:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) += #U5 +let AddedComplexity = 30 in +def MEMw_ADDi_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, u5Imm:$addend), + "memw($addr) += $addend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) -= #U5 +let AddedComplexity = 30 in +def MEMw_SUBi_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, u5Imm:$subend), + "memw($addr) -= $subend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) += Rt +let AddedComplexity = 30 in +def MEMw_ADDr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$addend), + "memw($addr) += $addend", + [(store (add (load ADDRriU6_2:$addr), IntRegs:$addend), + ADDRriU6_2:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) -= Rt +let AddedComplexity = 30 in +def MEMw_SUBr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$subend), + "memw($addr) -= $subend", + [(store (sub (load ADDRriU6_2:$addr), IntRegs:$subend), + ADDRriU6_2:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) &= Rt +let AddedComplexity = 30 in +def MEMw_ANDr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$andend), + "memw($addr) &= $andend", + [(store (and (load ADDRriU6_2:$addr), IntRegs:$andend), + ADDRriU6_2:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) |= Rt +let AddedComplexity = 30 in +def MEMw_ORr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$orend), + "memw($addr) |= $orend", + [(store (or (load ADDRriU6_2:$addr), IntRegs:$orend), +ADDRriU6_2:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +//===----------------------------------------------------------------------===// +// MEMOP: Halfword +// +// Implemented: +// MEMh_ADDi_indexed_V4 : memw(Rs+#u6:2)+=#U5 +// MEMh_SUBi_indexed_V4 : memw(Rs+#u6:2)-=#U5 +// MEMh_ADDr_indexed_V4 : memw(Rs+#u6:2)+=Rt +// MEMh_SUBr_indexed_V4 : memw(Rs+#u6:2)-=Rt +// MEMh_CLRr_indexed_V4 : memw(Rs+#u6:2)&=Rt +// MEMh_SETr_indexed_V4 : memw(Rs+#u6:2)|=Rt +// MEMh_ADDi_V4 : memw(Rs+#u6:2)+=#U5 +// MEMh_SUBi_V4 : memw(Rs+#u6:2)-=#U5 +// MEMh_ADDr_V4 : memw(Rs+#u6:2)+=Rt +// MEMh_SUBr_V4 : memw(Rs+#u6:2)-=Rt +// MEMh_CLRr_V4 : memw(Rs+#u6:2)&=Rt +// MEMh_SETr_V4 : memw(Rs+#u6:2)|=Rt +// +// Not implemented: +// MEMh_CLRi_indexed_V4 : memw(Rs+#u6:2)=clrbit(#U5) +// MEMh_SETi_indexed_V4 : memw(Rs+#u6:2)=setbit(#U5) +// MEMh_CLRi_V4 : memw(Rs+#u6:2)=clrbit(#U5) +// MEMh_SETi_V4 : memw(Rs+#u6:2)=setbit(#U5) +//===----------------------------------------------------------------------===// + + +// MEMh_ADDSUBi_indexed_V4: +// Pseudo operation for MEMh_ADDi_indexed_V4 and +// MEMh_SUBi_indexed_V4 a later pass will change it +// to the corresponding pattern. +let AddedComplexity = 30 in +def MEMh_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_1Imm:$offset, m6Imm:$addend), + "Error; should not emit", + [(truncstorei16 (add (sextloadi16 (add IntRegs:$base, + u6_1ImmPred:$offset)), + m6ImmPred:$addend), + (add IntRegs:$base, u6_1ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) += #U5 +let AddedComplexity = 30 in +def MEMh_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$addend), + "memh($base+#$offset) += $addend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) -= #U5 +let AddedComplexity = 30 in +def MEMh_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$subend), + "memh($base+#$offset) -= $subend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) += Rt +let AddedComplexity = 30 in +def MEMh_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$addend), + "memh($base+#$offset) += $addend", + [(truncstorei16 (add (sextloadi16 (add IntRegs:$base, + u6_1ImmPred:$offset)), + IntRegs:$addend), + (add IntRegs:$base, u6_1ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) -= Rt +let AddedComplexity = 30 in +def MEMh_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$subend), + "memh($base+#$offset) -= $subend", + [(truncstorei16 (sub (sextloadi16 (add IntRegs:$base, + u6_1ImmPred:$offset)), + IntRegs:$subend), + (add IntRegs:$base, u6_1ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) &= Rt +let AddedComplexity = 30 in +def MEMh_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$andend), + "memh($base+#$offset) += $andend", + [(truncstorei16 (and (sextloadi16 (add IntRegs:$base, + u6_1ImmPred:$offset)), + IntRegs:$andend), + (add IntRegs:$base, u6_1ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) |= Rt +let AddedComplexity = 30 in +def MEMh_ORr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$orend), + "memh($base+#$offset) |= $orend", + [(truncstorei16 (or (sextloadi16 (add IntRegs:$base, + u6_1ImmPred:$offset)), + IntRegs:$orend), + (add IntRegs:$base, u6_1ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// MEMh_ADDSUBi_V4: +// Pseudo operation for MEMh_ADDi_V4 and MEMh_SUBi_V4 +// a later pass will change it to the right pattern. +let AddedComplexity = 30 in +def MEMh_ADDSUBi_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, m6Imm:$addend), + "Error; should not emit", + [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr), + m6ImmPred:$addend), ADDRriU6_1:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) += #U5 +let AddedComplexity = 30 in +def MEMh_ADDi_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, u5Imm:$addend), + "memh($addr) += $addend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) -= #U5 +let AddedComplexity = 30 in +def MEMh_SUBi_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, u5Imm:$subend), + "memh($addr) -= $subend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) += Rt +let AddedComplexity = 30 in +def MEMh_ADDr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$addend), + "memh($addr) += $addend", + [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr), + IntRegs:$addend), ADDRriU6_1:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) -= Rt +let AddedComplexity = 30 in +def MEMh_SUBr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$subend), + "memh($addr) -= $subend", + [(truncstorei16 (sub (sextloadi16 ADDRriU6_1:$addr), + IntRegs:$subend), ADDRriU6_1:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) &= Rt +let AddedComplexity = 30 in +def MEMh_ANDr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$andend), + "memh($addr) &= $andend", + [(truncstorei16 (and (sextloadi16 ADDRriU6_1:$addr), + IntRegs:$andend), ADDRriU6_1:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) |= Rt +let AddedComplexity = 30 in +def MEMh_ORr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$orend), + "memh($addr) |= $orend", + [(truncstorei16 (or (sextloadi16 ADDRriU6_1:$addr), + IntRegs:$orend), ADDRriU6_1:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + + +//===----------------------------------------------------------------------===// +// MEMOP: Byte +// +// Implemented: +// MEMb_ADDi_indexed_V4 : memb(Rs+#u6:0)+=#U5 +// MEMb_SUBi_indexed_V4 : memb(Rs+#u6:0)-=#U5 +// MEMb_ADDr_indexed_V4 : memb(Rs+#u6:0)+=Rt +// MEMb_SUBr_indexed_V4 : memb(Rs+#u6:0)-=Rt +// MEMb_CLRr_indexed_V4 : memb(Rs+#u6:0)&=Rt +// MEMb_SETr_indexed_V4 : memb(Rs+#u6:0)|=Rt +// MEMb_ADDi_V4 : memb(Rs+#u6:0)+=#U5 +// MEMb_SUBi_V4 : memb(Rs+#u6:0)-=#U5 +// MEMb_ADDr_V4 : memb(Rs+#u6:0)+=Rt +// MEMb_SUBr_V4 : memb(Rs+#u6:0)-=Rt +// MEMb_CLRr_V4 : memb(Rs+#u6:0)&=Rt +// MEMb_SETr_V4 : memb(Rs+#u6:0)|=Rt +// +// Not implemented: +// MEMb_CLRi_indexed_V4 : memb(Rs+#u6:0)=clrbit(#U5) +// MEMb_SETi_indexed_V4 : memb(Rs+#u6:0)=setbit(#U5) +// MEMb_CLRi_V4 : memb(Rs+#u6:0)=clrbit(#U5) +// MEMb_SETi_V4 : memb(Rs+#u6:0)=setbit(#U5) +//===----------------------------------------------------------------------===// + + +// MEMb_ADDSUBi_indexed_V4: +// Pseudo operation for MEMb_ADDi_indexed_V4 and +// MEMb_SUBi_indexed_V4 a later pass will change it +// to the corresponding pattern. +let AddedComplexity = 30 in +def MEMb_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_0Imm:$offset, m6Imm:$addend), + "Error; should not emit", + [(truncstorei8 (add (sextloadi8 (add IntRegs:$base, + u6_0ImmPred:$offset)), + m6ImmPred:$addend), + (add IntRegs:$base, u6_0ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) += #U5 +let AddedComplexity = 30 in +def MEMb_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$addend), + "memb($base+#$offset) += $addend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) -= #U5 +let AddedComplexity = 30 in +def MEMb_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$subend), + "memb($base+#$offset) -= $subend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) += Rt +let AddedComplexity = 30 in +def MEMb_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$addend), + "memb($base+#$offset) += $addend", + [(truncstorei8 (add (sextloadi8 (add IntRegs:$base, + u6_0ImmPred:$offset)), + IntRegs:$addend), + (add IntRegs:$base, u6_0ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) -= Rt +let AddedComplexity = 30 in +def MEMb_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$subend), + "memb($base+#$offset) -= $subend", + [(truncstorei8 (sub (sextloadi8 (add IntRegs:$base, + u6_0ImmPred:$offset)), + IntRegs:$subend), + (add IntRegs:$base, u6_0ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) &= Rt +let AddedComplexity = 30 in +def MEMb_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$andend), + "memb($base+#$offset) += $andend", + [(truncstorei8 (and (sextloadi8 (add IntRegs:$base, + u6_0ImmPred:$offset)), + IntRegs:$andend), + (add IntRegs:$base, u6_0ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) |= Rt +let AddedComplexity = 30 in +def MEMb_ORr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$orend), + "memb($base+#$offset) |= $orend", + [(truncstorei8 (or (sextloadi8 (add IntRegs:$base, + u6_0ImmPred:$offset)), + IntRegs:$orend), + (add IntRegs:$base, u6_0ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// MEMb_ADDSUBi_V4: +// Pseudo operation for MEMb_ADDi_V4 and MEMb_SUBi_V4 +// a later pass will change it to the right pattern. +let AddedComplexity = 30 in +def MEMb_ADDSUBi_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, m6Imm:$addend), + "Error; should not emit", + [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr), + m6ImmPred:$addend), ADDRriU6_0:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) += #U5 +let AddedComplexity = 30 in +def MEMb_ADDi_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, u5Imm:$addend), + "memb($addr) += $addend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) -= #U5 +let AddedComplexity = 30 in +def MEMb_SUBi_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, u5Imm:$subend), + "memb($addr) -= $subend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) += Rt +let AddedComplexity = 30 in +def MEMb_ADDr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$addend), + "memb($addr) += $addend", + [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr), + IntRegs:$addend), ADDRriU6_0:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) -= Rt +let AddedComplexity = 30 in +def MEMb_SUBr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$subend), + "memb($addr) -= $subend", + [(truncstorei8 (sub (sextloadi8 ADDRriU6_0:$addr), + IntRegs:$subend), ADDRriU6_0:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) &= Rt +let AddedComplexity = 30 in +def MEMb_ANDr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$andend), + "memb($addr) &= $andend", + [(truncstorei8 (and (sextloadi8 ADDRriU6_0:$addr), + IntRegs:$andend), ADDRriU6_0:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) |= Rt +let AddedComplexity = 30 in +def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$orend), + "memb($addr) |= $orend", + [(truncstorei8 (or (sextloadi8 ADDRriU6_0:$addr), + IntRegs:$orend), ADDRriU6_0:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + + +//===----------------------------------------------------------------------===// +// XTYPE/PRED + +//===----------------------------------------------------------------------===// + +// Hexagon V4 only supports these flavors of byte/half compare instructions: +// EQ/GT/GTU. Other flavors like GE/GEU/LT/LTU/LE/LEU are not supported by +// hardware. However, compiler can still implement these patterns through +// appropriate patterns combinations based on current implemented patterns. +// The implemented patterns are: EQ/GT/GTU. +// Missing patterns are: GE/GEU/LT/LTU/LE/LEU. + +// Pd=cmpb.eq(Rs,#u8) +let isCompare = 1 in +def CMPbEQri_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, u8Imm:$src2), + "$dst = cmpb.eq($src1, #$src2)", + [(set PredRegs:$dst, (seteq (and IntRegs:$src1, 255), + u8ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// Pd=cmpb.eq(Rs,Rt) +let isCompare = 1 in +def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmpb.eq($src1, $src2)", + [(set PredRegs:$dst, (seteq (and (xor IntRegs:$src1, + IntRegs:$src2), + 255), + 0))]>, + Requires<[HasV4T]>; + +// Pd=cmpb.eq(Rs,Rt) +let isCompare = 1 in +def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmpb.eq($src1, $src2)", + [(set PredRegs:$dst, (seteq (shl IntRegs:$src1, (i32 24)), + (shl IntRegs:$src2, (i32 24))))]>, + Requires<[HasV4T]>; + +// Pd=cmpb.gt(Rs,#s8) +let isCompare = 1 in +def CMPbGTri_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, s32Imm:$src2), + "$dst = cmpb.gt($src1, #$src2)", + [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 24)), + s32_24ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// Pd=cmpb.gt(Rs,Rt) +let isCompare = 1 in +def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmpb.gt($src1, $src2)", + [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 24)), + (shl IntRegs:$src2, (i32 24))))]>, + Requires<[HasV4T]>; + +// Pd=cmpb.gtu(Rs,#u7) +let isCompare = 1 in +def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, u7Imm:$src2), + "$dst = cmpb.gtu($src1, #$src2)", + [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 255), + u7ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// Pd=cmpb.gtu(Rs,Rt) +let isCompare = 1 in +def CMPbGTUrr_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmpb.gtu($src1, $src2)", + [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 255), + (and IntRegs:$src2, 255)))]>, + Requires<[HasV4T]>; + +// Signed half compare(.eq) ri. +// Pd=cmph.eq(Rs,#s8) +let isCompare = 1 in +def CMPhEQri_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, u16Imm:$src2), + "$dst = cmph.eq($src1, #$src2)", + [(set PredRegs:$dst, (seteq (and IntRegs:$src1, 65535), + u16_s8ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// Signed half compare(.eq) rr. +// Case 1: xor + and, then compare: +// r0=xor(r0,r1) +// r0=and(r0,#0xffff) +// p0=cmp.eq(r0,#0) +// Pd=cmph.eq(Rs,Rt) +let isCompare = 1 in +def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmph.eq($src1, $src2)", + [(set PredRegs:$dst, (seteq (and (xor IntRegs:$src1, + IntRegs:$src2), + 65535), + 0))]>, + Requires<[HasV4T]>; + +// Signed half compare(.eq) rr. +// Case 2: shift left 16 bits then compare: +// r0=asl(r0,16) +// r1=asl(r1,16) +// p0=cmp.eq(r0,r1) +// Pd=cmph.eq(Rs,Rt) +let isCompare = 1 in +def CMPhEQrr_shl_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmph.eq($src1, $src2)", + [(set PredRegs:$dst, (seteq (shl IntRegs:$src1, (i32 16)), + (shl IntRegs:$src2, (i32 16))))]>, + Requires<[HasV4T]>; + +// Signed half compare(.gt) ri. +// Pd=cmph.gt(Rs,#s8) +let isCompare = 1 in +def CMPhGTri_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, s32Imm:$src2), + "$dst = cmph.gt($src1, #$src2)", + [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 16)), + s32_16s8ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// Signed half compare(.gt) rr. +// Pd=cmph.gt(Rs,Rt) +let isCompare = 1 in +def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmph.gt($src1, $src2)", + [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 16)), + (shl IntRegs:$src2, (i32 16))))]>, + Requires<[HasV4T]>; + +// Unsigned half compare rr (.gtu). +// Pd=cmph.gtu(Rs,Rt) +let isCompare = 1 in +def CMPhGTUrr_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmph.gtu($src1, $src2)", + [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 65535), + (and IntRegs:$src2, 65535)))]>, + Requires<[HasV4T]>; + +// Unsigned half compare ri (.gtu). +// Pd=cmph.gtu(Rs,#u7) +let isCompare = 1 in +def CMPhGTUri_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, u7Imm:$src2), + "$dst = cmph.gtu($src1, #$src2)", + [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 65535), + u7ImmPred:$src2))]>, + Requires<[HasV4T]>; + +//===----------------------------------------------------------------------===// +// XTYPE/PRED - +//===----------------------------------------------------------------------===// + +//Deallocate frame and return. +// dealloc_return +let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicable = 1, + Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in { + def DEALLOC_RET_V4 : NVInst_V4<(outs), (ins i32imm:$amt1), + "dealloc_return", + []>, + Requires<[HasV4T]>; +} + +// if (Ps) dealloc_return +let isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in { + def DEALLOC_RET_cPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, i32imm:$amt1), + "if ($src1) dealloc_return", + []>, + Requires<[HasV4T]>; +} + +// if (!Ps) dealloc_return +let isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in { + def DEALLOC_RET_cNotPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, + i32imm:$amt1), + "if (!$src1) dealloc_return", + []>, + Requires<[HasV4T]>; +} + +// if (Ps.new) dealloc_return:nt +let isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in { + def DEALLOC_RET_cdnPnt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, + i32imm:$amt1), + "if ($src1.new) dealloc_return:nt", + []>, + Requires<[HasV4T]>; +} + +// if (!Ps.new) dealloc_return:nt +let isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in { + def DEALLOC_RET_cNotdnPnt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, + i32imm:$amt1), + "if (!$src1.new) dealloc_return:nt", + []>, + Requires<[HasV4T]>; +} + +// if (Ps.new) dealloc_return:t +let isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in { + def DEALLOC_RET_cdnPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, + i32imm:$amt1), + "if ($src1.new) dealloc_return:t", + []>, + Requires<[HasV4T]>; +} + +// if (!Ps.new) dealloc_return:nt +let isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in { + def DEALLOC_RET_cNotdnPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, + i32imm:$amt1), + "if (!$src1.new) dealloc_return:t", + []>, + Requires<[HasV4T]>; +} diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td new file mode 100644 index 0000000..1328eba --- /dev/null +++ b/lib/Target/Hexagon/HexagonIntrinsics.td @@ -0,0 +1,3462 @@ +//===- HexagonIntrinsics.td - Instruction intrinsics -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is populated based on the following specs: +// Hexagon V2 Architecture +// Application-Level Specification +// 80-V9418-8 Rev. B +// March 4, 2008 +//===----------------------------------------------------------------------===// + +// +// ALU 32 types. +// + +class qi_ALU32_sisi<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class qi_ALU32_sis10<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_ALU32_sis8<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_ALU32_siu8<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_ALU32_siu9<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_ALU32_qisisi<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_ALU32_qis8si<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, + IntRegs:$src3))]>; + +class si_ALU32_qisis8<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + s8Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + imm:$src3))]>; + +class si_ALU32_qis8s8<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2, s8Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>; + +class si_ALU32_sisi<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU32_sisi_sat<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU32_sisi_rnd<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU32_sis16<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s16Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_ALU32_sis10<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_ALU32_s10si<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins s10Imm:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")), + [(set IntRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>; + +class si_lo_ALU32_siu16<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2), + !strconcat("$dst.l = ", !strconcat(opc , "#$src2")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_hi_ALU32_siu16<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2), + !strconcat("$dst.h = ", !strconcat(opc , "#$src2")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_ALU32_s16<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins s16Imm:$src1), + !strconcat("$dst = ", !strconcat(opc , "#$src1")), + [(set IntRegs:$dst, (IntID imm:$src1))]>; + +class di_ALU32_s8<string opc, Intrinsic IntID> + : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1), + !strconcat("$dst = ", !strconcat(opc , "#$src1")), + [(set DoubleRegs:$dst, (IntID imm:$src1))]>; + +class di_ALU64_di<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "$src")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>; + +class si_ALU32_si<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src)")), + [(set IntRegs:$dst, (IntID IntRegs:$src))]>; + +class si_ALU32_si_tfr<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "$src")), + [(set IntRegs:$dst, (IntID IntRegs:$src))]>; + +// +// ALU 64 types. +// + +class si_ALU64_si_sat<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src))]>; + +class si_ALU64_didi<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class di_ALU64_sidi<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>; + +class di_ALU64_didi<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_ALU64_qididi<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2, + DoubleRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2, + DoubleRegs:$src3))]>; + +class di_ALU64_sisi<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_ALU64_didi_sat<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_ALU64_didi_rnd<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_ALU64_didi_crnd<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_ALU64_didi_rnd_sat<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_ALU64_didi_crnd_sat<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class qi_ALU64_didi<string opc, Intrinsic IntID> + : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class si_ALU64_sisi<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_sat_lh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_sat_hh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_sat_lh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_sat_hl<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_sat_ll<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_hh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_hl<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_lh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_ll<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_sat_hh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):sat:<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_sat_lh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.H):sat:<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_sat_hl<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.L):sat:<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_sat_ll<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.L):sat:<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_hh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_hl<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_lh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_ll<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_lh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_ll<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_sat<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +// +// SInst classes. +// + +class qi_SInst_qi<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src)")), + [(set PredRegs:$dst, (IntID IntRegs:$src))]>; + +class qi_SInst_qi_pxfer<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "$src")), + [(set PredRegs:$dst, (IntID IntRegs:$src))]>; + +class qi_SInst_qiqi<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class qi_SInst_qiqi_neg<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, !$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_SInst_di<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>; + +class di_SInst_di_sat<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>; + +class si_SInst_di<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>; + +class si_SInst_di_sat<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src):sat")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>; + +class di_SInst_disi<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; + +class di_SInst_didi<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class di_SInst_si<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>; + +class si_SInst_sisiu3<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u3Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + imm:$src3))]>; + +class si_SInst_diu5<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; + +class si_SInst_disi<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; + +class si_SInst_sidi<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>; + +class di_SInst_disisi<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +class di_SInst_sisi<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class qi_SInst_siu5<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_SInst_siu6<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u6Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_SInst_sisi<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_SInst_si<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src)")), + [(set IntRegs:$dst, (IntID IntRegs:$src))]>; + +class si_SInst_si_sat<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src))]>; + +class di_SInst_qi<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src))]>; + +class si_SInst_qi<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "$src")), + [(set IntRegs:$dst, (IntID IntRegs:$src))]>; + +class si_SInst_qiqi<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class qi_SInst_si<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "$src")), + [(set PredRegs:$dst, (IntID IntRegs:$src))]>; + +class si_SInst_sisi<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_SInst_diu6<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; + +class si_SInst_siu5<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_SInst_siu5_rnd<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_SInst_siu5u5<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2, u5Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>; + +class si_SInst_sisisi_acc<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisisi_nac<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_SInst_didisi_acc<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_SInst_didisi_nac<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, IntRegs:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisiu5u5<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u5Imm:$src2, u5Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, #$src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2, imm:$src3))], + "$dst2 = $dst">; + +class si_SInst_sisidi<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_SInst_didiu6u6<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + u6Imm:$src2, u6Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, #$src2, #$src3)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + imm:$src2, imm:$src3))], + "$dst2 = $dst">; + +class di_SInst_dididi<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_SInst_diu6u6<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2, + u6Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2, + imm:$src3))]>; + +class di_SInst_didisi<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2, + IntRegs:$src3))]>; + +class di_SInst_didiqi<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2, + IntRegs:$src3))]>; + +class di_SInst_didiu3<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, + u3Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2, + imm:$src3))]>; + +class di_SInst_didisi_or<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2), + !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_SInst_didisi_and<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2), + !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_SInst_didiu6_and<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + u6Imm:$src2), + !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class di_SInst_didiu6_or<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + u6Imm:$src2), + !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class di_SInst_didiu6_xor<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + u6Imm:$src2), + !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisisi_and<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisisi_or<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + + +class si_SInst_sisiu5_and<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u5Imm:$src2), + !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisiu5_or<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u5Imm:$src2), + !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisiu5_xor<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u5Imm:$src2), + !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisiu5_acc<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u5Imm:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisiu5_nac<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u5Imm:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class di_SInst_didiu6_acc<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + u5Imm:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, imm:$src2))], + "$dst2 = $dst">; + +class di_SInst_didiu6_nac<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + u5Imm:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + + +// +// MInst classes. +// + +class di_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):<<1:rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_hh<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.L):<<1:rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_hl<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.L):rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.H):<<1:rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_lh<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.H):rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.L):<<1:rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_ll<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.L):rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_disisi_acc<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_sat_conj<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_sat_conj<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2*):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_s1_sat_conj<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1, $src2*):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_s1_sat_conj<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1, $src2*):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_s8s8<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins s8Imm:$src1, s8Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "(#$src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID imm:$src1, imm:$src2))]>; + +class si_MInst_sisi<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_hh<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_hh_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_lh<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_lh_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_hl<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_hl_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_ll<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_ll_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + + +class si_MInst_sisi_hh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_hh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_lh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_lh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_hl<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_hl_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_ll<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_ll_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_up<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_didi<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_MInst_didi_conj<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_MInst_sisi_s1_sat_conj<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2*):<<1:sat")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2):<<1:rnd:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_MInst_didi_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_MInst_didi_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2):rnd:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class si_SInst_sisi_sat<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_l_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2.L):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_h_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2.H):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_sat_conj<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2*):rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_s1_rnd_sat_conj<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2*):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2):rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisisi_xacc<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + IntRegs:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst += ", !strconcat(opc , "($src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + IntRegs:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst -= ", !strconcat(opc , "($src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + IntRegs:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisis8_acc<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + s8Imm:$src3), + !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + imm:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisis8_nac<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + s8Imm:$src3), + !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + imm:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisiu4u5<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u4Imm:$src2, u5Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, #$src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2, imm:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisiu8_acc<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + u8Imm:$src3), + !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + imm:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisiu8_nac<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + u8Imm:$src3), + !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + imm:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_hh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_lh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.L, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_lh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.L, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_hh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.H, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_hh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.H, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_hh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.H, $src2.H):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_hh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_hh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_hl_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_hl<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_lh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_lh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_ll_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_ll<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.H):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_hl<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_hl_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.H, $src2.L):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hl<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hl_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.L):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_lh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_lh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.L, $src2.H):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_lh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_lh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.H):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_ll<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_ll_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.L, $src2.L):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_ll_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.L, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_hl_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.H, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_ll<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.L, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_hl<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.H, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_ll<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_ll_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.L):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hh_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hh_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hl_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hl_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_lh_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_lh_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_ll_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_ll_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_ALU32_sisi<string opc, Intrinsic IntID> + : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_sat_conj<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*):sat")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_s1_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_didi_s1_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class si_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class si_MInst_didi_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class si_MInst_sisi_sat_hh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_hh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_hl<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_hl_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_lh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_lh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_ll<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_ll_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_hh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_hh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):<<1:rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_hh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , + "($src1.H, $src2.H):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_hl<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.H, $src2.L):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.H, $src2.L):<<1:rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_hl<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.H, $src2.L):rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_hl_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.H, $src2.L):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_lh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.H):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_lh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.H):rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_lh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.H):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.H):<<1:rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_ll<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.L):rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_ll_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.L):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_ll<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.L):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.L):<<1:rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_dididi_acc_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, + DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_dididi_acc_rnd_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1, $src2):rnd:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_dididi_acc_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_dididi_acc_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1, $src2):<<1:rnd:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_dididi_acc<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_dididi_acc_conj<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_hh<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_hl<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_lh<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_ll<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_hh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1.H, $src2.H):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_hl_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1.H, $src2.L):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_lh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1.L, $src2.H):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_ll_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1.L, $src2.L):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_hh<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_hl<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_lh<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_ll<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_hh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", + !strconcat(opc , "($src1.H, $src2.H):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_hl_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", + !strconcat(opc , "($src1.H, $src2.L):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_lh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", + !strconcat(opc , "($src1.L, $src2.H):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_ll_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", + !strconcat(opc , "($src1.L, $src2.L):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disi_s1_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_didisi_acc_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_disi_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1, $src2):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_didi<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + + +/******************************************************************** +* ALU32/ALU * +*********************************************************************/ + +// ALU32 / ALU / Add. +def Hexagon_A2_add: + si_ALU32_sisi <"add", int_hexagon_A2_add>; +def Hexagon_A2_addi: + si_ALU32_sis16 <"add", int_hexagon_A2_addi>; + +// ALU32 / ALU / Logical operations. +def Hexagon_A2_and: + si_ALU32_sisi <"and", int_hexagon_A2_and>; +def Hexagon_A2_andir: + si_ALU32_sis10 <"and", int_hexagon_A2_andir>; +def Hexagon_A2_not: + si_ALU32_si <"not", int_hexagon_A2_not>; +def Hexagon_A2_or: + si_ALU32_sisi <"or", int_hexagon_A2_or>; +def Hexagon_A2_orir: + si_ALU32_sis10 <"or", int_hexagon_A2_orir>; +def Hexagon_A2_xor: + si_ALU32_sisi <"xor", int_hexagon_A2_xor>; + +// ALU32 / ALU / Negate. +def Hexagon_A2_neg: + si_ALU32_si <"neg", int_hexagon_A2_neg>; + +// ALU32 / ALU / Subtract. +def Hexagon_A2_sub: + si_ALU32_sisi <"sub", int_hexagon_A2_sub>; +def Hexagon_A2_subri: + si_ALU32_s10si <"sub", int_hexagon_A2_subri>; + +// ALU32 / ALU / Transfer Immediate. +def Hexagon_A2_tfril: + si_lo_ALU32_siu16 <"", int_hexagon_A2_tfril>; +def Hexagon_A2_tfrih: + si_hi_ALU32_siu16 <"", int_hexagon_A2_tfrih>; +def Hexagon_A2_tfrsi: + si_ALU32_s16 <"", int_hexagon_A2_tfrsi>; +def Hexagon_A2_tfrpi: + di_ALU32_s8 <"", int_hexagon_A2_tfrpi>; + +// ALU32 / ALU / Transfer Register. +def Hexagon_A2_tfr: + si_ALU32_si_tfr <"", int_hexagon_A2_tfr>; + +/******************************************************************** +* ALU32/PERM * +*********************************************************************/ + +// ALU32 / PERM / Combine. +def Hexagon_A2_combinew: + di_ALU32_sisi <"combine", int_hexagon_A2_combinew>; +def Hexagon_A2_combine_hh: + si_MInst_sisi_hh <"combine", int_hexagon_A2_combine_hh>; +def Hexagon_A2_combine_lh: + si_MInst_sisi_lh <"combine", int_hexagon_A2_combine_lh>; +def Hexagon_A2_combine_hl: + si_MInst_sisi_hl <"combine", int_hexagon_A2_combine_hl>; +def Hexagon_A2_combine_ll: + si_MInst_sisi_ll <"combine", int_hexagon_A2_combine_ll>; +def Hexagon_A2_combineii: + di_MInst_s8s8 <"combine", int_hexagon_A2_combineii>; + +// ALU32 / PERM / Mux. +def Hexagon_C2_mux: + si_ALU32_qisisi <"mux", int_hexagon_C2_mux>; +def Hexagon_C2_muxri: + si_ALU32_qis8si <"mux", int_hexagon_C2_muxri>; +def Hexagon_C2_muxir: + si_ALU32_qisis8 <"mux", int_hexagon_C2_muxir>; +def Hexagon_C2_muxii: + si_ALU32_qis8s8 <"mux", int_hexagon_C2_muxii>; + +// ALU32 / PERM / Shift halfword. +def Hexagon_A2_aslh: + si_ALU32_si <"aslh", int_hexagon_A2_aslh>; +def Hexagon_A2_asrh: + si_ALU32_si <"asrh", int_hexagon_A2_asrh>; +def SI_to_SXTHI_asrh: + si_ALU32_si <"asrh", int_hexagon_SI_to_SXTHI_asrh>; + +// ALU32 / PERM / Sign/zero extend. +def Hexagon_A2_sxth: + si_ALU32_si <"sxth", int_hexagon_A2_sxth>; +def Hexagon_A2_sxtb: + si_ALU32_si <"sxtb", int_hexagon_A2_sxtb>; +def Hexagon_A2_zxth: + si_ALU32_si <"zxth", int_hexagon_A2_zxth>; +def Hexagon_A2_zxtb: + si_ALU32_si <"zxtb", int_hexagon_A2_zxtb>; + +/******************************************************************** +* ALU32/PRED * +*********************************************************************/ + +// ALU32 / PRED / Compare. +def Hexagon_C2_cmpeq: + qi_ALU32_sisi <"cmp.eq", int_hexagon_C2_cmpeq>; +def Hexagon_C2_cmpeqi: + qi_ALU32_sis10 <"cmp.eq", int_hexagon_C2_cmpeqi>; +def Hexagon_C2_cmpgei: + qi_ALU32_sis8 <"cmp.ge", int_hexagon_C2_cmpgei>; +def Hexagon_C2_cmpgeui: + qi_ALU32_siu8 <"cmp.geu", int_hexagon_C2_cmpgeui>; +def Hexagon_C2_cmpgt: + qi_ALU32_sisi <"cmp.gt", int_hexagon_C2_cmpgt>; +def Hexagon_C2_cmpgti: + qi_ALU32_sis10 <"cmp.gt", int_hexagon_C2_cmpgti>; +def Hexagon_C2_cmpgtu: + qi_ALU32_sisi <"cmp.gtu", int_hexagon_C2_cmpgtu>; +def Hexagon_C2_cmpgtui: + qi_ALU32_siu9 <"cmp.gtu", int_hexagon_C2_cmpgtui>; +def Hexagon_C2_cmplt: + qi_ALU32_sisi <"cmp.lt", int_hexagon_C2_cmplt>; +def Hexagon_C2_cmpltu: + qi_ALU32_sisi <"cmp.ltu", int_hexagon_C2_cmpltu>; + +/******************************************************************** +* ALU32/VH * +*********************************************************************/ + +// ALU32 / VH / Vector add halfwords. +// Rd32=vadd[u]h(Rs32,Rt32:sat] +def Hexagon_A2_svaddh: + si_ALU32_sisi <"vaddh", int_hexagon_A2_svaddh>; +def Hexagon_A2_svaddhs: + si_ALU32_sisi_sat <"vaddh", int_hexagon_A2_svaddhs>; +def Hexagon_A2_svadduhs: + si_ALU32_sisi_sat <"vadduh", int_hexagon_A2_svadduhs>; + +// ALU32 / VH / Vector average halfwords. +def Hexagon_A2_svavgh: + si_ALU32_sisi <"vavgh", int_hexagon_A2_svavgh>; +def Hexagon_A2_svavghs: + si_ALU32_sisi_rnd <"vavgh", int_hexagon_A2_svavghs>; +def Hexagon_A2_svnavgh: + si_ALU32_sisi <"vnavgh", int_hexagon_A2_svnavgh>; + +// ALU32 / VH / Vector subtract halfwords. +def Hexagon_A2_svsubh: + si_ALU32_sisi <"vsubh", int_hexagon_A2_svsubh>; +def Hexagon_A2_svsubhs: + si_ALU32_sisi_sat <"vsubh", int_hexagon_A2_svsubhs>; +def Hexagon_A2_svsubuhs: + si_ALU32_sisi_sat <"vsubuh", int_hexagon_A2_svsubuhs>; + +/******************************************************************** +* ALU64/ALU * +*********************************************************************/ + +// ALU64 / ALU / Add. +def Hexagon_A2_addp: + di_ALU64_didi <"add", int_hexagon_A2_addp>; +def Hexagon_A2_addsat: + si_ALU64_sisi_sat <"add", int_hexagon_A2_addsat>; + +// ALU64 / ALU / Add halfword. +// Even though the definition says hl, it should be lh - +//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits. +def Hexagon_A2_addh_l16_hl: + si_ALU64_sisi_l16_lh <"add", int_hexagon_A2_addh_l16_hl>; +def Hexagon_A2_addh_l16_ll: + si_ALU64_sisi_l16_ll <"add", int_hexagon_A2_addh_l16_ll>; + +def Hexagon_A2_addh_l16_sat_hl: + si_ALU64_sisi_l16_sat_lh <"add", int_hexagon_A2_addh_l16_sat_hl>; +def Hexagon_A2_addh_l16_sat_ll: + si_ALU64_sisi_l16_sat_ll <"add", int_hexagon_A2_addh_l16_sat_ll>; + +def Hexagon_A2_addh_h16_hh: + si_ALU64_sisi_h16_hh <"add", int_hexagon_A2_addh_h16_hh>; +def Hexagon_A2_addh_h16_hl: + si_ALU64_sisi_h16_hl <"add", int_hexagon_A2_addh_h16_hl>; +def Hexagon_A2_addh_h16_lh: + si_ALU64_sisi_h16_lh <"add", int_hexagon_A2_addh_h16_lh>; +def Hexagon_A2_addh_h16_ll: + si_ALU64_sisi_h16_ll <"add", int_hexagon_A2_addh_h16_ll>; + +def Hexagon_A2_addh_h16_sat_hh: + si_ALU64_sisi_h16_sat_hh <"add", int_hexagon_A2_addh_h16_sat_hh>; +def Hexagon_A2_addh_h16_sat_hl: + si_ALU64_sisi_h16_sat_hl <"add", int_hexagon_A2_addh_h16_sat_hl>; +def Hexagon_A2_addh_h16_sat_lh: + si_ALU64_sisi_h16_sat_lh <"add", int_hexagon_A2_addh_h16_sat_lh>; +def Hexagon_A2_addh_h16_sat_ll: + si_ALU64_sisi_h16_sat_ll <"add", int_hexagon_A2_addh_h16_sat_ll>; + +// ALU64 / ALU / Compare. +def Hexagon_C2_cmpeqp: + qi_ALU64_didi <"cmp.eq", int_hexagon_C2_cmpeqp>; +def Hexagon_C2_cmpgtp: + qi_ALU64_didi <"cmp.gt", int_hexagon_C2_cmpgtp>; +def Hexagon_C2_cmpgtup: + qi_ALU64_didi <"cmp.gtu", int_hexagon_C2_cmpgtup>; + +// ALU64 / ALU / Logical operations. +def Hexagon_A2_andp: + di_ALU64_didi <"and", int_hexagon_A2_andp>; +def Hexagon_A2_orp: + di_ALU64_didi <"or", int_hexagon_A2_orp>; +def Hexagon_A2_xorp: + di_ALU64_didi <"xor", int_hexagon_A2_xorp>; + +// ALU64 / ALU / Maximum. +def Hexagon_A2_max: + si_ALU64_sisi <"max", int_hexagon_A2_max>; +def Hexagon_A2_maxu: + si_ALU64_sisi <"maxu", int_hexagon_A2_maxu>; + +// ALU64 / ALU / Minimum. +def Hexagon_A2_min: + si_ALU64_sisi <"min", int_hexagon_A2_min>; +def Hexagon_A2_minu: + si_ALU64_sisi <"minu", int_hexagon_A2_minu>; + +// ALU64 / ALU / Subtract. +def Hexagon_A2_subp: + di_ALU64_didi <"sub", int_hexagon_A2_subp>; +def Hexagon_A2_subsat: + si_ALU64_sisi_sat <"sub", int_hexagon_A2_subsat>; + +// ALU64 / ALU / Subtract halfword. +// Even though the definition says hl, it should be lh - +//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits. +def Hexagon_A2_subh_l16_hl: + si_ALU64_sisi_l16_lh <"sub", int_hexagon_A2_subh_l16_hl>; +def Hexagon_A2_subh_l16_ll: + si_ALU64_sisi_l16_ll <"sub", int_hexagon_A2_subh_l16_ll>; + +def Hexagon_A2_subh_l16_sat_hl: + si_ALU64_sisi_l16_sat_lh <"sub", int_hexagon_A2_subh_l16_sat_hl>; +def Hexagon_A2_subh_l16_sat_ll: + si_ALU64_sisi_l16_sat_ll <"sub", int_hexagon_A2_subh_l16_sat_ll>; + +def Hexagon_A2_subh_h16_hh: + si_ALU64_sisi_h16_hh <"sub", int_hexagon_A2_subh_h16_hh>; +def Hexagon_A2_subh_h16_hl: + si_ALU64_sisi_h16_hl <"sub", int_hexagon_A2_subh_h16_hl>; +def Hexagon_A2_subh_h16_lh: + si_ALU64_sisi_h16_lh <"sub", int_hexagon_A2_subh_h16_lh>; +def Hexagon_A2_subh_h16_ll: + si_ALU64_sisi_h16_ll <"sub", int_hexagon_A2_subh_h16_ll>; + +def Hexagon_A2_subh_h16_sat_hh: + si_ALU64_sisi_h16_sat_hh <"sub", int_hexagon_A2_subh_h16_sat_hh>; +def Hexagon_A2_subh_h16_sat_hl: + si_ALU64_sisi_h16_sat_hl <"sub", int_hexagon_A2_subh_h16_sat_hl>; +def Hexagon_A2_subh_h16_sat_lh: + si_ALU64_sisi_h16_sat_lh <"sub", int_hexagon_A2_subh_h16_sat_lh>; +def Hexagon_A2_subh_h16_sat_ll: + si_ALU64_sisi_h16_sat_ll <"sub", int_hexagon_A2_subh_h16_sat_ll>; + +// ALU64 / ALU / Transfer register. +def Hexagon_A2_tfrp: + di_ALU64_di <"", int_hexagon_A2_tfrp>; + +/******************************************************************** +* ALU64/BIT * +*********************************************************************/ + +// ALU64 / BIT / Masked parity. +def Hexagon_S2_parityp: + si_ALU64_didi <"parity", int_hexagon_S2_parityp>; + +/******************************************************************** +* ALU64/PERM * +*********************************************************************/ + +// ALU64 / PERM / Vector pack high and low halfwords. +def Hexagon_S2_packhl: + di_ALU64_sisi <"packhl", int_hexagon_S2_packhl>; + +/******************************************************************** +* ALU64/VB * +*********************************************************************/ + +// ALU64 / VB / Vector add unsigned bytes. +def Hexagon_A2_vaddub: + di_ALU64_didi <"vaddub", int_hexagon_A2_vaddub>; +def Hexagon_A2_vaddubs: + di_ALU64_didi_sat <"vaddub", int_hexagon_A2_vaddubs>; + +// ALU64 / VB / Vector average unsigned bytes. +def Hexagon_A2_vavgub: + di_ALU64_didi <"vavgub", int_hexagon_A2_vavgub>; +def Hexagon_A2_vavgubr: + di_ALU64_didi_rnd <"vavgub", int_hexagon_A2_vavgubr>; + +// ALU64 / VB / Vector compare unsigned bytes. +def Hexagon_A2_vcmpbeq: + qi_ALU64_didi <"vcmpb.eq", int_hexagon_A2_vcmpbeq>; +def Hexagon_A2_vcmpbgtu: + qi_ALU64_didi <"vcmpb.gtu",int_hexagon_A2_vcmpbgtu>; + +// ALU64 / VB / Vector maximum/minimum unsigned bytes. +def Hexagon_A2_vmaxub: + di_ALU64_didi <"vmaxub", int_hexagon_A2_vmaxub>; +def Hexagon_A2_vminub: + di_ALU64_didi <"vminub", int_hexagon_A2_vminub>; + +// ALU64 / VB / Vector subtract unsigned bytes. +def Hexagon_A2_vsubub: + di_ALU64_didi <"vsubub", int_hexagon_A2_vsubub>; +def Hexagon_A2_vsububs: + di_ALU64_didi_sat <"vsubub", int_hexagon_A2_vsububs>; + +// ALU64 / VB / Vector mux. +def Hexagon_C2_vmux: + di_ALU64_qididi <"vmux", int_hexagon_C2_vmux>; + + +/******************************************************************** +* ALU64/VH * +*********************************************************************/ + +// ALU64 / VH / Vector add halfwords. +// Rdd64=vadd[u]h(Rss64,Rtt64:sat] +def Hexagon_A2_vaddh: + di_ALU64_didi <"vaddh", int_hexagon_A2_vaddh>; +def Hexagon_A2_vaddhs: + di_ALU64_didi_sat <"vaddh", int_hexagon_A2_vaddhs>; +def Hexagon_A2_vadduhs: + di_ALU64_didi_sat <"vadduh", int_hexagon_A2_vadduhs>; + +// ALU64 / VH / Vector average halfwords. +// Rdd64=v[n]avg[u]h(Rss64,Rtt64:rnd/:crnd][:sat] +def Hexagon_A2_vavgh: + di_ALU64_didi <"vavgh", int_hexagon_A2_vavgh>; +def Hexagon_A2_vavghcr: + di_ALU64_didi_crnd <"vavgh", int_hexagon_A2_vavghcr>; +def Hexagon_A2_vavghr: + di_ALU64_didi_rnd <"vavgh", int_hexagon_A2_vavghr>; +def Hexagon_A2_vavguh: + di_ALU64_didi <"vavguh", int_hexagon_A2_vavguh>; +def Hexagon_A2_vavguhr: + di_ALU64_didi_rnd <"vavguh", int_hexagon_A2_vavguhr>; +def Hexagon_A2_vnavgh: + di_ALU64_didi <"vnavgh", int_hexagon_A2_vnavgh>; +def Hexagon_A2_vnavghcr: + di_ALU64_didi_crnd_sat <"vnavgh", int_hexagon_A2_vnavghcr>; +def Hexagon_A2_vnavghr: + di_ALU64_didi_rnd_sat <"vnavgh", int_hexagon_A2_vnavghr>; + +// ALU64 / VH / Vector compare halfwords. +def Hexagon_A2_vcmpheq: + qi_ALU64_didi <"vcmph.eq", int_hexagon_A2_vcmpheq>; +def Hexagon_A2_vcmphgt: + qi_ALU64_didi <"vcmph.gt", int_hexagon_A2_vcmphgt>; +def Hexagon_A2_vcmphgtu: + qi_ALU64_didi <"vcmph.gtu",int_hexagon_A2_vcmphgtu>; + +// ALU64 / VH / Vector maximum halfwords. +def Hexagon_A2_vmaxh: + di_ALU64_didi <"vmaxh", int_hexagon_A2_vmaxh>; +def Hexagon_A2_vmaxuh: + di_ALU64_didi <"vmaxuh", int_hexagon_A2_vmaxuh>; + +// ALU64 / VH / Vector minimum halfwords. +def Hexagon_A2_vminh: + di_ALU64_didi <"vminh", int_hexagon_A2_vminh>; +def Hexagon_A2_vminuh: + di_ALU64_didi <"vminuh", int_hexagon_A2_vminuh>; + +// ALU64 / VH / Vector subtract halfwords. +def Hexagon_A2_vsubh: + di_ALU64_didi <"vsubh", int_hexagon_A2_vsubh>; +def Hexagon_A2_vsubhs: + di_ALU64_didi_sat <"vsubh", int_hexagon_A2_vsubhs>; +def Hexagon_A2_vsubuhs: + di_ALU64_didi_sat <"vsubuh", int_hexagon_A2_vsubuhs>; + + +/******************************************************************** +* ALU64/VW * +*********************************************************************/ + +// ALU64 / VW / Vector add words. +// Rdd32=vaddw(Rss32,Rtt32)[:sat] +def Hexagon_A2_vaddw: + di_ALU64_didi <"vaddw", int_hexagon_A2_vaddw>; +def Hexagon_A2_vaddws: + di_ALU64_didi_sat <"vaddw", int_hexagon_A2_vaddws>; + +// ALU64 / VW / Vector average words. +def Hexagon_A2_vavguw: + di_ALU64_didi <"vavguw", int_hexagon_A2_vavguw>; +def Hexagon_A2_vavguwr: + di_ALU64_didi_rnd <"vavguw", int_hexagon_A2_vavguwr>; +def Hexagon_A2_vavgw: + di_ALU64_didi <"vavgw", int_hexagon_A2_vavgw>; +def Hexagon_A2_vavgwcr: + di_ALU64_didi_crnd <"vavgw", int_hexagon_A2_vavgwcr>; +def Hexagon_A2_vavgwr: + di_ALU64_didi_rnd <"vavgw", int_hexagon_A2_vavgwr>; +def Hexagon_A2_vnavgw: + di_ALU64_didi <"vnavgw", int_hexagon_A2_vnavgw>; +def Hexagon_A2_vnavgwcr: + di_ALU64_didi_crnd_sat <"vnavgw", int_hexagon_A2_vnavgwcr>; +def Hexagon_A2_vnavgwr: + di_ALU64_didi_rnd_sat <"vnavgw", int_hexagon_A2_vnavgwr>; + +// ALU64 / VW / Vector compare words. +def Hexagon_A2_vcmpweq: + qi_ALU64_didi <"vcmpw.eq", int_hexagon_A2_vcmpweq>; +def Hexagon_A2_vcmpwgt: + qi_ALU64_didi <"vcmpw.gt", int_hexagon_A2_vcmpwgt>; +def Hexagon_A2_vcmpwgtu: + qi_ALU64_didi <"vcmpw.gtu",int_hexagon_A2_vcmpwgtu>; + +// ALU64 / VW / Vector maximum words. +def Hexagon_A2_vmaxw: + di_ALU64_didi <"vmaxw", int_hexagon_A2_vmaxw>; +def Hexagon_A2_vmaxuw: + di_ALU64_didi <"vmaxuw", int_hexagon_A2_vmaxuw>; + +// ALU64 / VW / Vector minimum words. +def Hexagon_A2_vminw: + di_ALU64_didi <"vminw", int_hexagon_A2_vminw>; +def Hexagon_A2_vminuw: + di_ALU64_didi <"vminuw", int_hexagon_A2_vminuw>; + +// ALU64 / VW / Vector subtract words. +def Hexagon_A2_vsubw: + di_ALU64_didi <"vsubw", int_hexagon_A2_vsubw>; +def Hexagon_A2_vsubws: + di_ALU64_didi_sat <"vsubw", int_hexagon_A2_vsubws>; + + +/******************************************************************** +* CR * +*********************************************************************/ + +// CR / Logical reductions on predicates. +def Hexagon_C2_all8: + qi_SInst_qi <"all8", int_hexagon_C2_all8>; +def Hexagon_C2_any8: + qi_SInst_qi <"any8", int_hexagon_C2_any8>; + +// CR / Logical operations on predicates. +def Hexagon_C2_pxfer_map: + qi_SInst_qi_pxfer <"", int_hexagon_C2_pxfer_map>; +def Hexagon_C2_and: + qi_SInst_qiqi <"and", int_hexagon_C2_and>; +def Hexagon_C2_andn: + qi_SInst_qiqi_neg <"and", int_hexagon_C2_andn>; +def Hexagon_C2_not: + qi_SInst_qi <"not", int_hexagon_C2_not>; +def Hexagon_C2_or: + qi_SInst_qiqi <"or", int_hexagon_C2_or>; +def Hexagon_C2_orn: + qi_SInst_qiqi_neg <"or", int_hexagon_C2_orn>; +def Hexagon_C2_xor: + qi_SInst_qiqi <"xor", int_hexagon_C2_xor>; + + +/******************************************************************** +* MTYPE/ALU * +*********************************************************************/ + +// MTYPE / ALU / Add and accumulate. +def Hexagon_M2_acci: + si_MInst_sisisi_acc <"add", int_hexagon_M2_acci>; +def Hexagon_M2_accii: + si_MInst_sisis8_acc <"add", int_hexagon_M2_accii>; +def Hexagon_M2_nacci: + si_MInst_sisisi_nac <"add", int_hexagon_M2_nacci>; +def Hexagon_M2_naccii: + si_MInst_sisis8_nac <"add", int_hexagon_M2_naccii>; + +// MTYPE / ALU / Subtract and accumulate. +def Hexagon_M2_subacc: + si_MInst_sisisi_acc <"sub", int_hexagon_M2_subacc>; + +// MTYPE / ALU / Vector absolute difference. +def Hexagon_M2_vabsdiffh: + di_MInst_didi <"vabsdiffh",int_hexagon_M2_vabsdiffh>; +def Hexagon_M2_vabsdiffw: + di_MInst_didi <"vabsdiffw",int_hexagon_M2_vabsdiffw>; + +// MTYPE / ALU / XOR and xor with destination. +def Hexagon_M2_xor_xacc: + si_MInst_sisisi_xacc <"xor", int_hexagon_M2_xor_xacc>; + + +/******************************************************************** +* MTYPE/COMPLEX * +*********************************************************************/ + +// MTYPE / COMPLEX / Complex multiply. +// Rdd[-+]=cmpy(Rs, Rt:<<1]:sat +def Hexagon_M2_cmpys_s1: + di_MInst_sisi_s1_sat <"cmpy", int_hexagon_M2_cmpys_s1>; +def Hexagon_M2_cmpys_s0: + di_MInst_sisi_sat <"cmpy", int_hexagon_M2_cmpys_s0>; +def Hexagon_M2_cmpysc_s1: + di_MInst_sisi_s1_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s1>; +def Hexagon_M2_cmpysc_s0: + di_MInst_sisi_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s0>; + +def Hexagon_M2_cmacs_s1: + di_MInst_disisi_acc_s1_sat <"cmpy", int_hexagon_M2_cmacs_s1>; +def Hexagon_M2_cmacs_s0: + di_MInst_disisi_acc_sat <"cmpy", int_hexagon_M2_cmacs_s0>; +def Hexagon_M2_cmacsc_s1: + di_MInst_disisi_acc_s1_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s1>; +def Hexagon_M2_cmacsc_s0: + di_MInst_disisi_acc_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s0>; + +def Hexagon_M2_cnacs_s1: + di_MInst_disisi_nac_s1_sat <"cmpy", int_hexagon_M2_cnacs_s1>; +def Hexagon_M2_cnacs_s0: + di_MInst_disisi_nac_sat <"cmpy", int_hexagon_M2_cnacs_s0>; +def Hexagon_M2_cnacsc_s1: + di_MInst_disisi_nac_s1_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s1>; +def Hexagon_M2_cnacsc_s0: + di_MInst_disisi_nac_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s0>; + +// MTYPE / COMPLEX / Complex multiply real or imaginary. +def Hexagon_M2_cmpyr_s0: + di_MInst_sisi <"cmpyr", int_hexagon_M2_cmpyr_s0>; +def Hexagon_M2_cmacr_s0: + di_MInst_disisi_acc <"cmpyr", int_hexagon_M2_cmacr_s0>; + +def Hexagon_M2_cmpyi_s0: + di_MInst_sisi <"cmpyi", int_hexagon_M2_cmpyi_s0>; +def Hexagon_M2_cmaci_s0: + di_MInst_disisi_acc <"cmpyi", int_hexagon_M2_cmaci_s0>; + +// MTYPE / COMPLEX / Complex multiply with round and pack. +// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat +def Hexagon_M2_cmpyrs_s0: + si_MInst_sisi_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s0>; +def Hexagon_M2_cmpyrs_s1: + si_MInst_sisi_s1_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s1>; + +def Hexagon_M2_cmpyrsc_s0: + si_MInst_sisi_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s0>; +def Hexagon_M2_cmpyrsc_s1: + si_MInst_sisi_s1_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s1>; + +//MTYPE / COMPLEX / Vector complex multiply real or imaginary. +def Hexagon_M2_vcmpy_s0_sat_i: + di_MInst_didi_sat <"vcmpyi", int_hexagon_M2_vcmpy_s0_sat_i>; +def Hexagon_M2_vcmpy_s1_sat_i: + di_MInst_didi_s1_sat <"vcmpyi", int_hexagon_M2_vcmpy_s1_sat_i>; + +def Hexagon_M2_vcmpy_s0_sat_r: + di_MInst_didi_sat <"vcmpyr", int_hexagon_M2_vcmpy_s0_sat_r>; +def Hexagon_M2_vcmpy_s1_sat_r: + di_MInst_didi_s1_sat <"vcmpyr", int_hexagon_M2_vcmpy_s1_sat_r>; + +def Hexagon_M2_vcmac_s0_sat_i: + di_MInst_dididi_acc_sat <"vcmpyi", int_hexagon_M2_vcmac_s0_sat_i>; +def Hexagon_M2_vcmac_s0_sat_r: + di_MInst_dididi_acc_sat <"vcmpyr", int_hexagon_M2_vcmac_s0_sat_r>; + +//MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary. +def Hexagon_M2_vrcmpyi_s0: + di_MInst_didi <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0>; +def Hexagon_M2_vrcmpyr_s0: + di_MInst_didi <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0>; + +def Hexagon_M2_vrcmpyi_s0c: + di_MInst_didi_conj <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0c>; +def Hexagon_M2_vrcmpyr_s0c: + di_MInst_didi_conj <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0c>; + +def Hexagon_M2_vrcmaci_s0: + di_MInst_dididi_acc <"vrcmpyi", int_hexagon_M2_vrcmaci_s0>; +def Hexagon_M2_vrcmacr_s0: + di_MInst_dididi_acc <"vrcmpyr", int_hexagon_M2_vrcmacr_s0>; + +def Hexagon_M2_vrcmaci_s0c: + di_MInst_dididi_acc_conj <"vrcmpyi", int_hexagon_M2_vrcmaci_s0c>; +def Hexagon_M2_vrcmacr_s0c: + di_MInst_dididi_acc_conj <"vrcmpyr", int_hexagon_M2_vrcmacr_s0c>; + + +/******************************************************************** +* MTYPE/MPYH * +*********************************************************************/ + +// MTYPE / MPYH / Multiply and use lower result. +//def Hexagon_M2_mpysmi: +// si_MInst_sim9 <"mpyi", int_hexagon_M2_mpysmi>; +def Hexagon_M2_mpyi: + si_MInst_sisi <"mpyi", int_hexagon_M2_mpyi>; +def Hexagon_M2_mpyui: + si_MInst_sisi <"mpyui", int_hexagon_M2_mpyui>; +def Hexagon_M2_macsip: + si_MInst_sisiu8_acc <"mpyi", int_hexagon_M2_macsip>; +def Hexagon_M2_maci: + si_MInst_sisisi_acc <"mpyi", int_hexagon_M2_maci>; +def Hexagon_M2_macsin: + si_MInst_sisiu8_nac <"mpyi", int_hexagon_M2_macsin>; + +// MTYPE / MPYH / Multiply word by half (32x16). +//Rdd[+]=vmpywoh(Rss,Rtt)[:<<1][:rnd][:sat] +//Rdd[+]=vmpyweh(Rss,Rtt)[:<<1][:rnd][:sat] +def Hexagon_M2_mmpyl_rs1: + di_MInst_didi_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs1>; +def Hexagon_M2_mmpyl_s1: + di_MInst_didi_s1_sat <"vmpyweh", int_hexagon_M2_mmpyl_s1>; +def Hexagon_M2_mmpyl_rs0: + di_MInst_didi_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs0>; +def Hexagon_M2_mmpyl_s0: + di_MInst_didi_sat <"vmpyweh", int_hexagon_M2_mmpyl_s0>; +def Hexagon_M2_mmpyh_rs1: + di_MInst_didi_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs1>; +def Hexagon_M2_mmpyh_s1: + di_MInst_didi_s1_sat <"vmpywoh", int_hexagon_M2_mmpyh_s1>; +def Hexagon_M2_mmpyh_rs0: + di_MInst_didi_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs0>; +def Hexagon_M2_mmpyh_s0: + di_MInst_didi_sat <"vmpywoh", int_hexagon_M2_mmpyh_s0>; +def Hexagon_M2_mmacls_rs1: + di_MInst_dididi_acc_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs1>; +def Hexagon_M2_mmacls_s1: + di_MInst_dididi_acc_s1_sat <"vmpyweh", int_hexagon_M2_mmacls_s1>; +def Hexagon_M2_mmacls_rs0: + di_MInst_dididi_acc_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs0>; +def Hexagon_M2_mmacls_s0: + di_MInst_dididi_acc_sat <"vmpyweh", int_hexagon_M2_mmacls_s0>; +def Hexagon_M2_mmachs_rs1: + di_MInst_dididi_acc_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs1>; +def Hexagon_M2_mmachs_s1: + di_MInst_dididi_acc_s1_sat <"vmpywoh", int_hexagon_M2_mmachs_s1>; +def Hexagon_M2_mmachs_rs0: + di_MInst_dididi_acc_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs0>; +def Hexagon_M2_mmachs_s0: + di_MInst_dididi_acc_sat <"vmpywoh", int_hexagon_M2_mmachs_s0>; + +// MTYPE / MPYH / Multiply word by unsigned half (32x16). +//Rdd[+]=vmpywouh(Rss,Rtt)[:<<1][:rnd][:sat] +//Rdd[+]=vmpyweuh(Rss,Rtt)[:<<1][:rnd][:sat] +def Hexagon_M2_mmpyul_rs1: + di_MInst_didi_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs1>; +def Hexagon_M2_mmpyul_s1: + di_MInst_didi_s1_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s1>; +def Hexagon_M2_mmpyul_rs0: + di_MInst_didi_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs0>; +def Hexagon_M2_mmpyul_s0: + di_MInst_didi_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s0>; +def Hexagon_M2_mmpyuh_rs1: + di_MInst_didi_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs1>; +def Hexagon_M2_mmpyuh_s1: + di_MInst_didi_s1_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s1>; +def Hexagon_M2_mmpyuh_rs0: + di_MInst_didi_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs0>; +def Hexagon_M2_mmpyuh_s0: + di_MInst_didi_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s0>; +def Hexagon_M2_mmaculs_rs1: + di_MInst_dididi_acc_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs1>; +def Hexagon_M2_mmaculs_s1: + di_MInst_dididi_acc_s1_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s1>; +def Hexagon_M2_mmaculs_rs0: + di_MInst_dididi_acc_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs0>; +def Hexagon_M2_mmaculs_s0: + di_MInst_dididi_acc_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s0>; +def Hexagon_M2_mmacuhs_rs1: + di_MInst_dididi_acc_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs1>; +def Hexagon_M2_mmacuhs_s1: + di_MInst_dididi_acc_s1_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s1>; +def Hexagon_M2_mmacuhs_rs0: + di_MInst_dididi_acc_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs0>; +def Hexagon_M2_mmacuhs_s0: + di_MInst_dididi_acc_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s0>; + +// MTYPE / MPYH / Multiply and use upper result. +def Hexagon_M2_hmmpyh_rs1: + si_MInst_sisi_h_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyh_rs1>; +def Hexagon_M2_hmmpyl_rs1: + si_MInst_sisi_l_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyl_rs1>; +def Hexagon_M2_mpy_up: + si_MInst_sisi <"mpy", int_hexagon_M2_mpy_up>; +def Hexagon_M2_dpmpyss_rnd_s0: + si_MInst_sisi_rnd <"mpy", int_hexagon_M2_dpmpyss_rnd_s0>; +def Hexagon_M2_mpyu_up: + si_MInst_sisi <"mpyu", int_hexagon_M2_mpyu_up>; + +// MTYPE / MPYH / Multiply and use full result. +def Hexagon_M2_dpmpyuu_s0: + di_MInst_sisi <"mpyu", int_hexagon_M2_dpmpyuu_s0>; +def Hexagon_M2_dpmpyuu_acc_s0: + di_MInst_disisi_acc <"mpyu", int_hexagon_M2_dpmpyuu_acc_s0>; +def Hexagon_M2_dpmpyuu_nac_s0: + di_MInst_disisi_nac <"mpyu", int_hexagon_M2_dpmpyuu_nac_s0>; +def Hexagon_M2_dpmpyss_s0: + di_MInst_sisi <"mpy", int_hexagon_M2_dpmpyss_s0>; +def Hexagon_M2_dpmpyss_acc_s0: + di_MInst_disisi_acc <"mpy", int_hexagon_M2_dpmpyss_acc_s0>; +def Hexagon_M2_dpmpyss_nac_s0: + di_MInst_disisi_nac <"mpy", int_hexagon_M2_dpmpyss_nac_s0>; + + +/******************************************************************** +* MTYPE/MPYS * +*********************************************************************/ + +// MTYPE / MPYS / Scalar 16x16 multiply signed. +//Rd=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]| +// [:<<0[:rnd|:sat|:rnd:sat]|:<<1[:rnd|:sat|:rnd:sat]]] +def Hexagon_M2_mpy_hh_s0: + si_MInst_sisi_hh <"mpy", int_hexagon_M2_mpy_hh_s0>; +def Hexagon_M2_mpy_hh_s1: + si_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpy_hh_s1>; +def Hexagon_M2_mpy_rnd_hh_s1: + si_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_rnd_hh_s1>; +def Hexagon_M2_mpy_sat_rnd_hh_s1: + si_MInst_sisi_sat_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s1>; +def Hexagon_M2_mpy_sat_hh_s1: + si_MInst_sisi_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_hh_s1>; +def Hexagon_M2_mpy_rnd_hh_s0: + si_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpy_rnd_hh_s0>; +def Hexagon_M2_mpy_sat_rnd_hh_s0: + si_MInst_sisi_sat_rnd_hh <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s0>; +def Hexagon_M2_mpy_sat_hh_s0: + si_MInst_sisi_sat_hh <"mpy", int_hexagon_M2_mpy_sat_hh_s0>; + +def Hexagon_M2_mpy_hl_s0: + si_MInst_sisi_hl <"mpy", int_hexagon_M2_mpy_hl_s0>; +def Hexagon_M2_mpy_hl_s1: + si_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpy_hl_s1>; +def Hexagon_M2_mpy_rnd_hl_s1: + si_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_rnd_hl_s1>; +def Hexagon_M2_mpy_sat_rnd_hl_s1: + si_MInst_sisi_sat_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s1>; +def Hexagon_M2_mpy_sat_hl_s1: + si_MInst_sisi_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_hl_s1>; +def Hexagon_M2_mpy_rnd_hl_s0: + si_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpy_rnd_hl_s0>; +def Hexagon_M2_mpy_sat_rnd_hl_s0: + si_MInst_sisi_sat_rnd_hl <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s0>; +def Hexagon_M2_mpy_sat_hl_s0: + si_MInst_sisi_sat_hl <"mpy", int_hexagon_M2_mpy_sat_hl_s0>; + +def Hexagon_M2_mpy_lh_s0: + si_MInst_sisi_lh <"mpy", int_hexagon_M2_mpy_lh_s0>; +def Hexagon_M2_mpy_lh_s1: + si_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpy_lh_s1>; +def Hexagon_M2_mpy_rnd_lh_s1: + si_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_rnd_lh_s1>; +def Hexagon_M2_mpy_sat_rnd_lh_s1: + si_MInst_sisi_sat_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s1>; +def Hexagon_M2_mpy_sat_lh_s1: + si_MInst_sisi_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_lh_s1>; +def Hexagon_M2_mpy_rnd_lh_s0: + si_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpy_rnd_lh_s0>; +def Hexagon_M2_mpy_sat_rnd_lh_s0: + si_MInst_sisi_sat_rnd_lh <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s0>; +def Hexagon_M2_mpy_sat_lh_s0: + si_MInst_sisi_sat_lh <"mpy", int_hexagon_M2_mpy_sat_lh_s0>; + +def Hexagon_M2_mpy_ll_s0: + si_MInst_sisi_ll <"mpy", int_hexagon_M2_mpy_ll_s0>; +def Hexagon_M2_mpy_ll_s1: + si_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpy_ll_s1>; +def Hexagon_M2_mpy_rnd_ll_s1: + si_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_rnd_ll_s1>; +def Hexagon_M2_mpy_sat_rnd_ll_s1: + si_MInst_sisi_sat_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s1>; +def Hexagon_M2_mpy_sat_ll_s1: + si_MInst_sisi_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_ll_s1>; +def Hexagon_M2_mpy_rnd_ll_s0: + si_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpy_rnd_ll_s0>; +def Hexagon_M2_mpy_sat_rnd_ll_s0: + si_MInst_sisi_sat_rnd_ll <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s0>; +def Hexagon_M2_mpy_sat_ll_s0: + si_MInst_sisi_sat_ll <"mpy", int_hexagon_M2_mpy_sat_ll_s0>; + +//Rdd=mpy(Rs.[H|L],Rt.[H|L])[[:<<0|:<<1]|[:<<0:rnd|:<<1:rnd]] +def Hexagon_M2_mpyd_hh_s0: + di_MInst_sisi_hh <"mpy", int_hexagon_M2_mpyd_hh_s0>; +def Hexagon_M2_mpyd_hh_s1: + di_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpyd_hh_s1>; +def Hexagon_M2_mpyd_rnd_hh_s1: + di_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hh_s1>; +def Hexagon_M2_mpyd_rnd_hh_s0: + di_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpyd_rnd_hh_s0>; + +def Hexagon_M2_mpyd_hl_s0: + di_MInst_sisi_hl <"mpy", int_hexagon_M2_mpyd_hl_s0>; +def Hexagon_M2_mpyd_hl_s1: + di_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpyd_hl_s1>; +def Hexagon_M2_mpyd_rnd_hl_s1: + di_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hl_s1>; +def Hexagon_M2_mpyd_rnd_hl_s0: + di_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpyd_rnd_hl_s0>; + +def Hexagon_M2_mpyd_lh_s0: + di_MInst_sisi_lh <"mpy", int_hexagon_M2_mpyd_lh_s0>; +def Hexagon_M2_mpyd_lh_s1: + di_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpyd_lh_s1>; +def Hexagon_M2_mpyd_rnd_lh_s1: + di_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_lh_s1>; +def Hexagon_M2_mpyd_rnd_lh_s0: + di_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpyd_rnd_lh_s0>; + +def Hexagon_M2_mpyd_ll_s0: + di_MInst_sisi_ll <"mpy", int_hexagon_M2_mpyd_ll_s0>; +def Hexagon_M2_mpyd_ll_s1: + di_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpyd_ll_s1>; +def Hexagon_M2_mpyd_rnd_ll_s1: + di_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpyd_rnd_ll_s1>; +def Hexagon_M2_mpyd_rnd_ll_s0: + di_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpyd_rnd_ll_s0>; + +//Rx+=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]] +def Hexagon_M2_mpy_acc_hh_s0: + si_MInst_sisisi_acc_hh <"mpy", int_hexagon_M2_mpy_acc_hh_s0>; +def Hexagon_M2_mpy_acc_hh_s1: + si_MInst_sisisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_hh_s1>; +def Hexagon_M2_mpy_acc_sat_hh_s1: + si_MInst_sisisi_acc_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s1>; +def Hexagon_M2_mpy_acc_sat_hh_s0: + si_MInst_sisisi_acc_sat_hh <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s0>; + +def Hexagon_M2_mpy_acc_hl_s0: + si_MInst_sisisi_acc_hl <"mpy", int_hexagon_M2_mpy_acc_hl_s0>; +def Hexagon_M2_mpy_acc_hl_s1: + si_MInst_sisisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_hl_s1>; +def Hexagon_M2_mpy_acc_sat_hl_s1: + si_MInst_sisisi_acc_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s1>; +def Hexagon_M2_mpy_acc_sat_hl_s0: + si_MInst_sisisi_acc_sat_hl <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s0>; + +def Hexagon_M2_mpy_acc_lh_s0: + si_MInst_sisisi_acc_lh <"mpy", int_hexagon_M2_mpy_acc_lh_s0>; +def Hexagon_M2_mpy_acc_lh_s1: + si_MInst_sisisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_lh_s1>; +def Hexagon_M2_mpy_acc_sat_lh_s1: + si_MInst_sisisi_acc_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s1>; +def Hexagon_M2_mpy_acc_sat_lh_s0: + si_MInst_sisisi_acc_sat_lh <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s0>; + +def Hexagon_M2_mpy_acc_ll_s0: + si_MInst_sisisi_acc_ll <"mpy", int_hexagon_M2_mpy_acc_ll_s0>; +def Hexagon_M2_mpy_acc_ll_s1: + si_MInst_sisisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_ll_s1>; +def Hexagon_M2_mpy_acc_sat_ll_s1: + si_MInst_sisisi_acc_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s1>; +def Hexagon_M2_mpy_acc_sat_ll_s0: + si_MInst_sisisi_acc_sat_ll <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s0>; + +//Rx-=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]] +def Hexagon_M2_mpy_nac_hh_s0: + si_MInst_sisisi_nac_hh <"mpy", int_hexagon_M2_mpy_nac_hh_s0>; +def Hexagon_M2_mpy_nac_hh_s1: + si_MInst_sisisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_hh_s1>; +def Hexagon_M2_mpy_nac_sat_hh_s1: + si_MInst_sisisi_nac_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s1>; +def Hexagon_M2_mpy_nac_sat_hh_s0: + si_MInst_sisisi_nac_sat_hh <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s0>; + +def Hexagon_M2_mpy_nac_hl_s0: + si_MInst_sisisi_nac_hl <"mpy", int_hexagon_M2_mpy_nac_hl_s0>; +def Hexagon_M2_mpy_nac_hl_s1: + si_MInst_sisisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_hl_s1>; +def Hexagon_M2_mpy_nac_sat_hl_s1: + si_MInst_sisisi_nac_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s1>; +def Hexagon_M2_mpy_nac_sat_hl_s0: + si_MInst_sisisi_nac_sat_hl <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s0>; + +def Hexagon_M2_mpy_nac_lh_s0: + si_MInst_sisisi_nac_lh <"mpy", int_hexagon_M2_mpy_nac_lh_s0>; +def Hexagon_M2_mpy_nac_lh_s1: + si_MInst_sisisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_lh_s1>; +def Hexagon_M2_mpy_nac_sat_lh_s1: + si_MInst_sisisi_nac_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s1>; +def Hexagon_M2_mpy_nac_sat_lh_s0: + si_MInst_sisisi_nac_sat_lh <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s0>; + +def Hexagon_M2_mpy_nac_ll_s0: + si_MInst_sisisi_nac_ll <"mpy", int_hexagon_M2_mpy_nac_ll_s0>; +def Hexagon_M2_mpy_nac_ll_s1: + si_MInst_sisisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_ll_s1>; +def Hexagon_M2_mpy_nac_sat_ll_s1: + si_MInst_sisisi_nac_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s1>; +def Hexagon_M2_mpy_nac_sat_ll_s0: + si_MInst_sisisi_nac_sat_ll <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s0>; + +//Rx+=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1] +def Hexagon_M2_mpyd_acc_hh_s0: + di_MInst_disisi_acc_hh <"mpy", int_hexagon_M2_mpyd_acc_hh_s0>; +def Hexagon_M2_mpyd_acc_hh_s1: + di_MInst_disisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpyd_acc_hh_s1>; + +def Hexagon_M2_mpyd_acc_hl_s0: + di_MInst_disisi_acc_hl <"mpy", int_hexagon_M2_mpyd_acc_hl_s0>; +def Hexagon_M2_mpyd_acc_hl_s1: + di_MInst_disisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpyd_acc_hl_s1>; + +def Hexagon_M2_mpyd_acc_lh_s0: + di_MInst_disisi_acc_lh <"mpy", int_hexagon_M2_mpyd_acc_lh_s0>; +def Hexagon_M2_mpyd_acc_lh_s1: + di_MInst_disisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpyd_acc_lh_s1>; + +def Hexagon_M2_mpyd_acc_ll_s0: + di_MInst_disisi_acc_ll <"mpy", int_hexagon_M2_mpyd_acc_ll_s0>; +def Hexagon_M2_mpyd_acc_ll_s1: + di_MInst_disisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpyd_acc_ll_s1>; + +//Rx-=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1] +def Hexagon_M2_mpyd_nac_hh_s0: + di_MInst_disisi_nac_hh <"mpy", int_hexagon_M2_mpyd_nac_hh_s0>; +def Hexagon_M2_mpyd_nac_hh_s1: + di_MInst_disisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpyd_nac_hh_s1>; + +def Hexagon_M2_mpyd_nac_hl_s0: + di_MInst_disisi_nac_hl <"mpy", int_hexagon_M2_mpyd_nac_hl_s0>; +def Hexagon_M2_mpyd_nac_hl_s1: + di_MInst_disisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpyd_nac_hl_s1>; + +def Hexagon_M2_mpyd_nac_lh_s0: + di_MInst_disisi_nac_lh <"mpy", int_hexagon_M2_mpyd_nac_lh_s0>; +def Hexagon_M2_mpyd_nac_lh_s1: + di_MInst_disisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpyd_nac_lh_s1>; + +def Hexagon_M2_mpyd_nac_ll_s0: + di_MInst_disisi_nac_ll <"mpy", int_hexagon_M2_mpyd_nac_ll_s0>; +def Hexagon_M2_mpyd_nac_ll_s1: + di_MInst_disisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpyd_nac_ll_s1>; + +// MTYPE / MPYS / Scalar 16x16 multiply unsigned. +//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] +def Hexagon_M2_mpyu_hh_s0: + si_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyu_hh_s0>; +def Hexagon_M2_mpyu_hh_s1: + si_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyu_hh_s1>; +def Hexagon_M2_mpyu_hl_s0: + si_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyu_hl_s0>; +def Hexagon_M2_mpyu_hl_s1: + si_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyu_hl_s1>; +def Hexagon_M2_mpyu_lh_s0: + si_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyu_lh_s0>; +def Hexagon_M2_mpyu_lh_s1: + si_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyu_lh_s1>; +def Hexagon_M2_mpyu_ll_s0: + si_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyu_ll_s0>; +def Hexagon_M2_mpyu_ll_s1: + si_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyu_ll_s1>; + +//Rdd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] +def Hexagon_M2_mpyud_hh_s0: + di_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyud_hh_s0>; +def Hexagon_M2_mpyud_hh_s1: + di_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyud_hh_s1>; +def Hexagon_M2_mpyud_hl_s0: + di_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyud_hl_s0>; +def Hexagon_M2_mpyud_hl_s1: + di_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyud_hl_s1>; +def Hexagon_M2_mpyud_lh_s0: + di_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyud_lh_s0>; +def Hexagon_M2_mpyud_lh_s1: + di_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyud_lh_s1>; +def Hexagon_M2_mpyud_ll_s0: + di_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyud_ll_s0>; +def Hexagon_M2_mpyud_ll_s1: + di_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyud_ll_s1>; + +//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] +def Hexagon_M2_mpyu_acc_hh_s0: + si_MInst_sisisi_acc_hh <"mpyu", int_hexagon_M2_mpyu_acc_hh_s0>; +def Hexagon_M2_mpyu_acc_hh_s1: + si_MInst_sisisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hh_s1>; +def Hexagon_M2_mpyu_acc_hl_s0: + si_MInst_sisisi_acc_hl <"mpyu", int_hexagon_M2_mpyu_acc_hl_s0>; +def Hexagon_M2_mpyu_acc_hl_s1: + si_MInst_sisisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hl_s1>; +def Hexagon_M2_mpyu_acc_lh_s0: + si_MInst_sisisi_acc_lh <"mpyu", int_hexagon_M2_mpyu_acc_lh_s0>; +def Hexagon_M2_mpyu_acc_lh_s1: + si_MInst_sisisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_lh_s1>; +def Hexagon_M2_mpyu_acc_ll_s0: + si_MInst_sisisi_acc_ll <"mpyu", int_hexagon_M2_mpyu_acc_ll_s0>; +def Hexagon_M2_mpyu_acc_ll_s1: + si_MInst_sisisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyu_acc_ll_s1>; + +//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] +def Hexagon_M2_mpyu_nac_hh_s0: + si_MInst_sisisi_nac_hh <"mpyu", int_hexagon_M2_mpyu_nac_hh_s0>; +def Hexagon_M2_mpyu_nac_hh_s1: + si_MInst_sisisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hh_s1>; +def Hexagon_M2_mpyu_nac_hl_s0: + si_MInst_sisisi_nac_hl <"mpyu", int_hexagon_M2_mpyu_nac_hl_s0>; +def Hexagon_M2_mpyu_nac_hl_s1: + si_MInst_sisisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hl_s1>; +def Hexagon_M2_mpyu_nac_lh_s0: + si_MInst_sisisi_nac_lh <"mpyu", int_hexagon_M2_mpyu_nac_lh_s0>; +def Hexagon_M2_mpyu_nac_lh_s1: + si_MInst_sisisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_lh_s1>; +def Hexagon_M2_mpyu_nac_ll_s0: + si_MInst_sisisi_nac_ll <"mpyu", int_hexagon_M2_mpyu_nac_ll_s0>; +def Hexagon_M2_mpyu_nac_ll_s1: + si_MInst_sisisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyu_nac_ll_s1>; + +//Rdd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] +def Hexagon_M2_mpyud_acc_hh_s0: + di_MInst_disisi_acc_hh <"mpyu", int_hexagon_M2_mpyud_acc_hh_s0>; +def Hexagon_M2_mpyud_acc_hh_s1: + di_MInst_disisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hh_s1>; +def Hexagon_M2_mpyud_acc_hl_s0: + di_MInst_disisi_acc_hl <"mpyu", int_hexagon_M2_mpyud_acc_hl_s0>; +def Hexagon_M2_mpyud_acc_hl_s1: + di_MInst_disisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hl_s1>; +def Hexagon_M2_mpyud_acc_lh_s0: + di_MInst_disisi_acc_lh <"mpyu", int_hexagon_M2_mpyud_acc_lh_s0>; +def Hexagon_M2_mpyud_acc_lh_s1: + di_MInst_disisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_lh_s1>; +def Hexagon_M2_mpyud_acc_ll_s0: + di_MInst_disisi_acc_ll <"mpyu", int_hexagon_M2_mpyud_acc_ll_s0>; +def Hexagon_M2_mpyud_acc_ll_s1: + di_MInst_disisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyud_acc_ll_s1>; + +//Rdd-=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] +def Hexagon_M2_mpyud_nac_hh_s0: + di_MInst_disisi_nac_hh <"mpyu", int_hexagon_M2_mpyud_nac_hh_s0>; +def Hexagon_M2_mpyud_nac_hh_s1: + di_MInst_disisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hh_s1>; +def Hexagon_M2_mpyud_nac_hl_s0: + di_MInst_disisi_nac_hl <"mpyu", int_hexagon_M2_mpyud_nac_hl_s0>; +def Hexagon_M2_mpyud_nac_hl_s1: + di_MInst_disisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hl_s1>; +def Hexagon_M2_mpyud_nac_lh_s0: + di_MInst_disisi_nac_lh <"mpyu", int_hexagon_M2_mpyud_nac_lh_s0>; +def Hexagon_M2_mpyud_nac_lh_s1: + di_MInst_disisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_lh_s1>; +def Hexagon_M2_mpyud_nac_ll_s0: + di_MInst_disisi_nac_ll <"mpyu", int_hexagon_M2_mpyud_nac_ll_s0>; +def Hexagon_M2_mpyud_nac_ll_s1: + di_MInst_disisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyud_nac_ll_s1>; + + +/******************************************************************** +* MTYPE/VB * +*********************************************************************/ + +// MTYPE / VB / Vector reduce add unsigned bytes. +def Hexagon_A2_vraddub: + di_MInst_didi <"vraddub", int_hexagon_A2_vraddub>; +def Hexagon_A2_vraddub_acc: + di_MInst_dididi_acc <"vraddub", int_hexagon_A2_vraddub_acc>; + +// MTYPE / VB / Vector sum of absolute differences unsigned bytes. +def Hexagon_A2_vrsadub: + di_MInst_didi <"vrsadub", int_hexagon_A2_vrsadub>; +def Hexagon_A2_vrsadub_acc: + di_MInst_dididi_acc <"vrsadub", int_hexagon_A2_vrsadub_acc>; + +/******************************************************************** +* MTYPE/VH * +*********************************************************************/ + +// MTYPE / VH / Vector dual multiply. +def Hexagon_M2_vdmpys_s1: + di_MInst_didi_s1_sat <"vdmpy", int_hexagon_M2_vdmpys_s1>; +def Hexagon_M2_vdmpys_s0: + di_MInst_didi_sat <"vdmpy", int_hexagon_M2_vdmpys_s0>; +def Hexagon_M2_vdmacs_s1: + di_MInst_dididi_acc_s1_sat <"vdmpy", int_hexagon_M2_vdmacs_s1>; +def Hexagon_M2_vdmacs_s0: + di_MInst_dididi_acc_sat <"vdmpy", int_hexagon_M2_vdmacs_s0>; + +// MTYPE / VH / Vector dual multiply with round and pack. +def Hexagon_M2_vdmpyrs_s0: + si_MInst_didi_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s0>; +def Hexagon_M2_vdmpyrs_s1: + si_MInst_didi_s1_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s1>; + +// MTYPE / VH / Vector multiply even halfwords. +def Hexagon_M2_vmpy2es_s1: + di_MInst_didi_s1_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s1>; +def Hexagon_M2_vmpy2es_s0: + di_MInst_didi_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s0>; +def Hexagon_M2_vmac2es: + di_MInst_dididi_acc <"vmpyeh", int_hexagon_M2_vmac2es>; +def Hexagon_M2_vmac2es_s1: + di_MInst_dididi_acc_s1_sat <"vmpyeh", int_hexagon_M2_vmac2es_s1>; +def Hexagon_M2_vmac2es_s0: + di_MInst_dididi_acc_sat <"vmpyeh", int_hexagon_M2_vmac2es_s0>; + +// MTYPE / VH / Vector multiply halfwords. +def Hexagon_M2_vmpy2s_s0: + di_MInst_sisi_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0>; +def Hexagon_M2_vmpy2s_s1: + di_MInst_sisi_s1_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1>; +def Hexagon_M2_vmac2: + di_MInst_disisi_acc <"vmpyh", int_hexagon_M2_vmac2>; +def Hexagon_M2_vmac2s_s0: + di_MInst_disisi_acc_sat <"vmpyh", int_hexagon_M2_vmac2s_s0>; +def Hexagon_M2_vmac2s_s1: + di_MInst_disisi_acc_s1_sat <"vmpyh", int_hexagon_M2_vmac2s_s1>; + +// MTYPE / VH / Vector multiply halfwords with round and pack. +def Hexagon_M2_vmpy2s_s0pack: + si_MInst_sisi_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0pack>; +def Hexagon_M2_vmpy2s_s1pack: + si_MInst_sisi_s1_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1pack>; + +// MTYPE / VH / Vector reduce multiply halfwords. +// Rxx32+=vrmpyh(Rss32,Rtt32) +def Hexagon_M2_vrmpy_s0: + di_MInst_didi <"vrmpyh", int_hexagon_M2_vrmpy_s0>; +def Hexagon_M2_vrmac_s0: + di_MInst_dididi_acc <"vrmpyh", int_hexagon_M2_vrmac_s0>; + + +/******************************************************************** +* STYPE/ALU * +*********************************************************************/ + +// STYPE / ALU / Absolute value. +def Hexagon_A2_abs: + si_SInst_si <"abs", int_hexagon_A2_abs>; +def Hexagon_A2_absp: + di_SInst_di <"abs", int_hexagon_A2_absp>; +def Hexagon_A2_abssat: + si_SInst_si_sat <"abs", int_hexagon_A2_abssat>; + +// STYPE / ALU / Negate. +def Hexagon_A2_negp: + di_SInst_di <"neg", int_hexagon_A2_negp>; +def Hexagon_A2_negsat: + si_SInst_si_sat <"neg", int_hexagon_A2_negsat>; + +// STYPE / ALU / Logical Not. +def Hexagon_A2_notp: + di_SInst_di <"not", int_hexagon_A2_notp>; + +// STYPE / ALU / Sign extend word to doubleword. +def Hexagon_A2_sxtw: + di_SInst_si <"sxtw", int_hexagon_A2_sxtw>; + + +/******************************************************************** +* STYPE/BIT * +*********************************************************************/ + +// STYPE / BIT / Count leading. +def Hexagon_S2_cl0: + si_SInst_si <"cl0", int_hexagon_S2_cl0>; +def Hexagon_S2_cl0p: + si_SInst_di <"cl0", int_hexagon_S2_cl0p>; +def Hexagon_S2_cl1: + si_SInst_si <"cl1", int_hexagon_S2_cl1>; +def Hexagon_S2_cl1p: + si_SInst_di <"cl1", int_hexagon_S2_cl1p>; +def Hexagon_S2_clb: + si_SInst_si <"clb", int_hexagon_S2_clb>; +def Hexagon_S2_clbp: + si_SInst_di <"clb", int_hexagon_S2_clbp>; +def Hexagon_S2_clbnorm: + si_SInst_si <"normamt", int_hexagon_S2_clbnorm>; + +// STYPE / BIT / Count trailing. +def Hexagon_S2_ct0: + si_SInst_si <"ct0", int_hexagon_S2_ct0>; +def Hexagon_S2_ct1: + si_SInst_si <"ct1", int_hexagon_S2_ct1>; + +// STYPE / BIT / Compare bit mask. +def HEXAGON_C2_bitsclr: + qi_SInst_sisi <"bitsclr", int_hexagon_C2_bitsclr>; +def HEXAGON_C2_bitsclri: + qi_SInst_siu6 <"bitsclr", int_hexagon_C2_bitsclri>; +def HEXAGON_C2_bitsset: + qi_SInst_sisi <"bitsset", int_hexagon_C2_bitsset>; + +// STYPE / BIT / Extract unsigned. +// Rd[d][32/64]=extractu(Rs[s],Rt[t],[imm]) +def Hexagon_S2_extractu: + si_SInst_siu5u5 <"extractu",int_hexagon_S2_extractu>; +def Hexagon_S2_extractu_rp: + si_SInst_sidi <"extractu",int_hexagon_S2_extractu_rp>; +def Hexagon_S2_extractup: + di_SInst_diu6u6 <"extractu",int_hexagon_S2_extractup>; +def Hexagon_S2_extractup_rp: + di_SInst_didi <"extractu",int_hexagon_S2_extractup_rp>; + +// STYPE / BIT / Insert bitfield. +def HEXAGON_S2_insert: + si_SInst_sisiu5u5 <"insert", int_hexagon_S2_insert>; +def HEXAGON_S2_insert_rp: + si_SInst_sisidi <"insert", int_hexagon_S2_insert_rp>; +def HEXAGON_S2_insertp: + di_SInst_didiu6u6 <"insert", int_hexagon_S2_insertp>; +def HEXAGON_S2_insertp_rp: + di_SInst_dididi <"insert", int_hexagon_S2_insertp_rp>; + +// STYPE / BIT / Innterleave/deinterleave. +def HEXAGON_S2_interleave: + di_SInst_di <"interleave", int_hexagon_S2_interleave>; +def HEXAGON_S2_deinterleave: + di_SInst_di <"deinterleave", int_hexagon_S2_deinterleave>; + +// STYPE / BIT / Linear feedback-shift Iteration. +def HEXAGON_S2_lfsp: + di_SInst_didi <"lfs", int_hexagon_S2_lfsp>; + +// STYPE / BIT / Bit reverse. +def HEXAGON_S2_brev: + si_SInst_si <"brev", int_hexagon_S2_brev>; + +// STYPE / BIT / Set/Clear/Toggle Bit. +def Hexagon_S2_setbit_i: + si_SInst_siu5 <"setbit", int_hexagon_S2_setbit_i>; +def Hexagon_S2_togglebit_i: + si_SInst_siu5 <"togglebit", int_hexagon_S2_togglebit_i>; +def Hexagon_S2_clrbit_i: + si_SInst_siu5 <"clrbit", int_hexagon_S2_clrbit_i>; +def Hexagon_S2_setbit_r: + si_SInst_sisi <"setbit", int_hexagon_S2_setbit_r>; +def Hexagon_S2_togglebit_r: + si_SInst_sisi <"togglebit", int_hexagon_S2_togglebit_r>; +def Hexagon_S2_clrbit_r: + si_SInst_sisi <"clrbit", int_hexagon_S2_clrbit_r>; + +// STYPE / BIT / Test Bit. +def Hexagon_S2_tstbit_i: + qi_SInst_siu5 <"tstbit", int_hexagon_S2_tstbit_i>; +def Hexagon_S2_tstbit_r: + qi_SInst_sisi <"tstbit", int_hexagon_S2_tstbit_r>; + + +/******************************************************************** +* STYPE/COMPLEX * +*********************************************************************/ + +// STYPE / COMPLEX / Vector Complex conjugate. +def Hexagon_A2_vconj: + di_SInst_di_sat <"vconj", int_hexagon_A2_vconj>; + +// STYPE / COMPLEX / Vector Complex rotate. +def Hexagon_S2_vcrotate: + di_SInst_disi <"vcrotate",int_hexagon_S2_vcrotate>; + + +/******************************************************************** +* STYPE/PERM * +*********************************************************************/ + +// STYPE / PERM / Saturate. +def Hexagon_A2_sat: + si_SInst_di <"sat", int_hexagon_A2_sat>; +def Hexagon_A2_satb: + si_SInst_si <"satb", int_hexagon_A2_satb>; +def Hexagon_A2_sath: + si_SInst_si <"sath", int_hexagon_A2_sath>; +def Hexagon_A2_satub: + si_SInst_si <"satub", int_hexagon_A2_satub>; +def Hexagon_A2_satuh: + si_SInst_si <"satuh", int_hexagon_A2_satuh>; + +// STYPE / PERM / Swizzle bytes. +def Hexagon_A2_swiz: + si_SInst_si <"swiz", int_hexagon_A2_swiz>; + +// STYPE / PERM / Vector align. +// Need custom lowering +def Hexagon_S2_valignib: + di_SInst_didiu3 <"valignb", int_hexagon_S2_valignib>; +def Hexagon_S2_valignrb: + di_SInst_didiqi <"valignb", int_hexagon_S2_valignrb>; + +// STYPE / PERM / Vector round and pack. +def Hexagon_S2_vrndpackwh: + si_SInst_di <"vrndwh", int_hexagon_S2_vrndpackwh>; +def Hexagon_S2_vrndpackwhs: + si_SInst_di_sat <"vrndwh", int_hexagon_S2_vrndpackwhs>; + +// STYPE / PERM / Vector saturate and pack. +def Hexagon_S2_svsathb: + si_SInst_si <"vsathb", int_hexagon_S2_svsathb>; +def Hexagon_S2_vsathb: + si_SInst_di <"vsathb", int_hexagon_S2_vsathb>; +def Hexagon_S2_svsathub: + si_SInst_si <"vsathub", int_hexagon_S2_svsathub>; +def Hexagon_S2_vsathub: + si_SInst_di <"vsathub", int_hexagon_S2_vsathub>; +def Hexagon_S2_vsatwh: + si_SInst_di <"vsatwh", int_hexagon_S2_vsatwh>; +def Hexagon_S2_vsatwuh: + si_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh>; + +// STYPE / PERM / Vector saturate without pack. +def Hexagon_S2_vsathb_nopack: + di_SInst_di <"vsathb", int_hexagon_S2_vsathb_nopack>; +def Hexagon_S2_vsathub_nopack: + di_SInst_di <"vsathub", int_hexagon_S2_vsathub_nopack>; +def Hexagon_S2_vsatwh_nopack: + di_SInst_di <"vsatwh", int_hexagon_S2_vsatwh_nopack>; +def Hexagon_S2_vsatwuh_nopack: + di_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh_nopack>; + +// STYPE / PERM / Vector shuffle. +def Hexagon_S2_shuffeb: + di_SInst_didi <"shuffeb", int_hexagon_S2_shuffeb>; +def Hexagon_S2_shuffeh: + di_SInst_didi <"shuffeh", int_hexagon_S2_shuffeh>; +def Hexagon_S2_shuffob: + di_SInst_didi <"shuffob", int_hexagon_S2_shuffob>; +def Hexagon_S2_shuffoh: + di_SInst_didi <"shuffoh", int_hexagon_S2_shuffoh>; + +// STYPE / PERM / Vector splat bytes. +def Hexagon_S2_vsplatrb: + si_SInst_si <"vsplatb", int_hexagon_S2_vsplatrb>; + +// STYPE / PERM / Vector splat halfwords. +def Hexagon_S2_vsplatrh: + di_SInst_si <"vsplath", int_hexagon_S2_vsplatrh>; + +// STYPE / PERM / Vector splice. +def HEXAGON_S2_vsplicerb: + di_SInst_didiqi <"vspliceb",int_hexagon_S2_vsplicerb>; +def HEXAGON_S2_vspliceib: + di_SInst_didiu3 <"vspliceb",int_hexagon_S2_vspliceib>; + +// STYPE / PERM / Sign extend. +def Hexagon_S2_vsxtbh: + di_SInst_si <"vsxtbh", int_hexagon_S2_vsxtbh>; +def Hexagon_S2_vsxthw: + di_SInst_si <"vsxthw", int_hexagon_S2_vsxthw>; + +// STYPE / PERM / Truncate. +def Hexagon_S2_vtrunehb: + si_SInst_di <"vtrunehb",int_hexagon_S2_vtrunehb>; +def Hexagon_S2_vtrunohb: + si_SInst_di <"vtrunohb",int_hexagon_S2_vtrunohb>; +def Hexagon_S2_vtrunewh: + di_SInst_didi <"vtrunewh",int_hexagon_S2_vtrunewh>; +def Hexagon_S2_vtrunowh: + di_SInst_didi <"vtrunowh",int_hexagon_S2_vtrunowh>; + +// STYPE / PERM / Zero extend. +def Hexagon_S2_vzxtbh: + di_SInst_si <"vzxtbh", int_hexagon_S2_vzxtbh>; +def Hexagon_S2_vzxthw: + di_SInst_si <"vzxthw", int_hexagon_S2_vzxthw>; + + +/******************************************************************** +* STYPE/PRED * +*********************************************************************/ + +// STYPE / PRED / Mask generate from predicate. +def Hexagon_C2_mask: + di_SInst_qi <"mask", int_hexagon_C2_mask>; + +// STYPE / PRED / Predicate transfer. +def Hexagon_C2_tfrpr: + si_SInst_qi <"", int_hexagon_C2_tfrpr>; +def Hexagon_C2_tfrrp: + qi_SInst_si <"", int_hexagon_C2_tfrrp>; + +// STYPE / PRED / Viterbi pack even and odd predicate bits. +def Hexagon_C2_vitpack: + si_SInst_qiqi <"vitpack",int_hexagon_C2_vitpack>; + + +/******************************************************************** +* STYPE/SHIFT * +*********************************************************************/ + +// STYPE / SHIFT / Shift by immediate. +def Hexagon_S2_asl_i_r: + si_SInst_siu5 <"asl", int_hexagon_S2_asl_i_r>; +def Hexagon_S2_asr_i_r: + si_SInst_siu5 <"asr", int_hexagon_S2_asr_i_r>; +def Hexagon_S2_lsr_i_r: + si_SInst_siu5 <"lsr", int_hexagon_S2_lsr_i_r>; +def Hexagon_S2_asl_i_p: + di_SInst_diu6 <"asl", int_hexagon_S2_asl_i_p>; +def Hexagon_S2_asr_i_p: + di_SInst_diu6 <"asr", int_hexagon_S2_asr_i_p>; +def Hexagon_S2_lsr_i_p: + di_SInst_diu6 <"lsr", int_hexagon_S2_lsr_i_p>; + +// STYPE / SHIFT / Shift by immediate and accumulate. +def Hexagon_S2_asl_i_r_acc: + si_SInst_sisiu5_acc <"asl", int_hexagon_S2_asl_i_r_acc>; +def Hexagon_S2_asr_i_r_acc: + si_SInst_sisiu5_acc <"asr", int_hexagon_S2_asr_i_r_acc>; +def Hexagon_S2_lsr_i_r_acc: + si_SInst_sisiu5_acc <"lsr", int_hexagon_S2_lsr_i_r_acc>; +def Hexagon_S2_asl_i_r_nac: + si_SInst_sisiu5_nac <"asl", int_hexagon_S2_asl_i_r_nac>; +def Hexagon_S2_asr_i_r_nac: + si_SInst_sisiu5_nac <"asr", int_hexagon_S2_asr_i_r_nac>; +def Hexagon_S2_lsr_i_r_nac: + si_SInst_sisiu5_nac <"lsr", int_hexagon_S2_lsr_i_r_nac>; +def Hexagon_S2_asl_i_p_acc: + di_SInst_didiu6_acc <"asl", int_hexagon_S2_asl_i_p_acc>; +def Hexagon_S2_asr_i_p_acc: + di_SInst_didiu6_acc <"asr", int_hexagon_S2_asr_i_p_acc>; +def Hexagon_S2_lsr_i_p_acc: + di_SInst_didiu6_acc <"lsr", int_hexagon_S2_lsr_i_p_acc>; +def Hexagon_S2_asl_i_p_nac: + di_SInst_didiu6_nac <"asl", int_hexagon_S2_asl_i_p_nac>; +def Hexagon_S2_asr_i_p_nac: + di_SInst_didiu6_nac <"asr", int_hexagon_S2_asr_i_p_nac>; +def Hexagon_S2_lsr_i_p_nac: + di_SInst_didiu6_nac <"lsr", int_hexagon_S2_lsr_i_p_nac>; + +// STYPE / SHIFT / Shift by immediate and add. +def Hexagon_S2_addasl_rrri: + si_SInst_sisiu3 <"addasl", int_hexagon_S2_addasl_rrri>; + +// STYPE / SHIFT / Shift by immediate and logical. +def Hexagon_S2_asl_i_r_and: + si_SInst_sisiu5_and <"asl", int_hexagon_S2_asl_i_r_and>; +def Hexagon_S2_asr_i_r_and: + si_SInst_sisiu5_and <"asr", int_hexagon_S2_asr_i_r_and>; +def Hexagon_S2_lsr_i_r_and: + si_SInst_sisiu5_and <"lsr", int_hexagon_S2_lsr_i_r_and>; + +def Hexagon_S2_asl_i_r_xacc: + si_SInst_sisiu5_xor <"asl", int_hexagon_S2_asl_i_r_xacc>; +def Hexagon_S2_lsr_i_r_xacc: + si_SInst_sisiu5_xor <"lsr", int_hexagon_S2_lsr_i_r_xacc>; + +def Hexagon_S2_asl_i_r_or: + si_SInst_sisiu5_or <"asl", int_hexagon_S2_asl_i_r_or>; +def Hexagon_S2_asr_i_r_or: + si_SInst_sisiu5_or <"asr", int_hexagon_S2_asr_i_r_or>; +def Hexagon_S2_lsr_i_r_or: + si_SInst_sisiu5_or <"lsr", int_hexagon_S2_lsr_i_r_or>; + +def Hexagon_S2_asl_i_p_and: + di_SInst_didiu6_and <"asl", int_hexagon_S2_asl_i_p_and>; +def Hexagon_S2_asr_i_p_and: + di_SInst_didiu6_and <"asr", int_hexagon_S2_asr_i_p_and>; +def Hexagon_S2_lsr_i_p_and: + di_SInst_didiu6_and <"lsr", int_hexagon_S2_lsr_i_p_and>; + +def Hexagon_S2_asl_i_p_xacc: + di_SInst_didiu6_xor <"asl", int_hexagon_S2_asl_i_p_xacc>; +def Hexagon_S2_lsr_i_p_xacc: + di_SInst_didiu6_xor <"lsr", int_hexagon_S2_lsr_i_p_xacc>; + +def Hexagon_S2_asl_i_p_or: + di_SInst_didiu6_or <"asl", int_hexagon_S2_asl_i_p_or>; +def Hexagon_S2_asr_i_p_or: + di_SInst_didiu6_or <"asr", int_hexagon_S2_asr_i_p_or>; +def Hexagon_S2_lsr_i_p_or: + di_SInst_didiu6_or <"lsr", int_hexagon_S2_lsr_i_p_or>; + +// STYPE / SHIFT / Shift right by immediate with rounding. +def Hexagon_S2_asr_i_r_rnd: + si_SInst_siu5_rnd <"asr", int_hexagon_S2_asr_i_r_rnd>; +def Hexagon_S2_asr_i_r_rnd_goodsyntax: + si_SInst_siu5 <"asrrnd", int_hexagon_S2_asr_i_r_rnd_goodsyntax>; + +// STYPE / SHIFT / Shift left by immediate with saturation. +def Hexagon_S2_asl_i_r_sat: + si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_i_r_sat>; + +// STYPE / SHIFT / Shift by register. +def Hexagon_S2_asl_r_r: + si_SInst_sisi <"asl", int_hexagon_S2_asl_r_r>; +def Hexagon_S2_asr_r_r: + si_SInst_sisi <"asr", int_hexagon_S2_asr_r_r>; +def Hexagon_S2_lsl_r_r: + si_SInst_sisi <"lsl", int_hexagon_S2_lsl_r_r>; +def Hexagon_S2_lsr_r_r: + si_SInst_sisi <"lsr", int_hexagon_S2_lsr_r_r>; +def Hexagon_S2_asl_r_p: + di_SInst_disi <"asl", int_hexagon_S2_asl_r_p>; +def Hexagon_S2_asr_r_p: + di_SInst_disi <"asr", int_hexagon_S2_asr_r_p>; +def Hexagon_S2_lsl_r_p: + di_SInst_disi <"lsl", int_hexagon_S2_lsl_r_p>; +def Hexagon_S2_lsr_r_p: + di_SInst_disi <"lsr", int_hexagon_S2_lsr_r_p>; + +// STYPE / SHIFT / Shift by register and accumulate. +def Hexagon_S2_asl_r_r_acc: + si_SInst_sisisi_acc <"asl", int_hexagon_S2_asl_r_r_acc>; +def Hexagon_S2_asr_r_r_acc: + si_SInst_sisisi_acc <"asr", int_hexagon_S2_asr_r_r_acc>; +def Hexagon_S2_lsl_r_r_acc: + si_SInst_sisisi_acc <"lsl", int_hexagon_S2_lsl_r_r_acc>; +def Hexagon_S2_lsr_r_r_acc: + si_SInst_sisisi_acc <"lsr", int_hexagon_S2_lsr_r_r_acc>; +def Hexagon_S2_asl_r_p_acc: + di_SInst_didisi_acc <"asl", int_hexagon_S2_asl_r_p_acc>; +def Hexagon_S2_asr_r_p_acc: + di_SInst_didisi_acc <"asr", int_hexagon_S2_asr_r_p_acc>; +def Hexagon_S2_lsl_r_p_acc: + di_SInst_didisi_acc <"lsl", int_hexagon_S2_lsl_r_p_acc>; +def Hexagon_S2_lsr_r_p_acc: + di_SInst_didisi_acc <"lsr", int_hexagon_S2_lsr_r_p_acc>; + +def Hexagon_S2_asl_r_r_nac: + si_SInst_sisisi_nac <"asl", int_hexagon_S2_asl_r_r_nac>; +def Hexagon_S2_asr_r_r_nac: + si_SInst_sisisi_nac <"asr", int_hexagon_S2_asr_r_r_nac>; +def Hexagon_S2_lsl_r_r_nac: + si_SInst_sisisi_nac <"lsl", int_hexagon_S2_lsl_r_r_nac>; +def Hexagon_S2_lsr_r_r_nac: + si_SInst_sisisi_nac <"lsr", int_hexagon_S2_lsr_r_r_nac>; +def Hexagon_S2_asl_r_p_nac: + di_SInst_didisi_nac <"asl", int_hexagon_S2_asl_r_p_nac>; +def Hexagon_S2_asr_r_p_nac: + di_SInst_didisi_nac <"asr", int_hexagon_S2_asr_r_p_nac>; +def Hexagon_S2_lsl_r_p_nac: + di_SInst_didisi_nac <"lsl", int_hexagon_S2_lsl_r_p_nac>; +def Hexagon_S2_lsr_r_p_nac: + di_SInst_didisi_nac <"lsr", int_hexagon_S2_lsr_r_p_nac>; + +// STYPE / SHIFT / Shift by register and logical. +def Hexagon_S2_asl_r_r_and: + si_SInst_sisisi_and <"asl", int_hexagon_S2_asl_r_r_and>; +def Hexagon_S2_asr_r_r_and: + si_SInst_sisisi_and <"asr", int_hexagon_S2_asr_r_r_and>; +def Hexagon_S2_lsl_r_r_and: + si_SInst_sisisi_and <"lsl", int_hexagon_S2_lsl_r_r_and>; +def Hexagon_S2_lsr_r_r_and: + si_SInst_sisisi_and <"lsr", int_hexagon_S2_lsr_r_r_and>; + +def Hexagon_S2_asl_r_r_or: + si_SInst_sisisi_or <"asl", int_hexagon_S2_asl_r_r_or>; +def Hexagon_S2_asr_r_r_or: + si_SInst_sisisi_or <"asr", int_hexagon_S2_asr_r_r_or>; +def Hexagon_S2_lsl_r_r_or: + si_SInst_sisisi_or <"lsl", int_hexagon_S2_lsl_r_r_or>; +def Hexagon_S2_lsr_r_r_or: + si_SInst_sisisi_or <"lsr", int_hexagon_S2_lsr_r_r_or>; + +def Hexagon_S2_asl_r_p_and: + di_SInst_didisi_and <"asl", int_hexagon_S2_asl_r_p_and>; +def Hexagon_S2_asr_r_p_and: + di_SInst_didisi_and <"asr", int_hexagon_S2_asr_r_p_and>; +def Hexagon_S2_lsl_r_p_and: + di_SInst_didisi_and <"lsl", int_hexagon_S2_lsl_r_p_and>; +def Hexagon_S2_lsr_r_p_and: + di_SInst_didisi_and <"lsr", int_hexagon_S2_lsr_r_p_and>; + +def Hexagon_S2_asl_r_p_or: + di_SInst_didisi_or <"asl", int_hexagon_S2_asl_r_p_or>; +def Hexagon_S2_asr_r_p_or: + di_SInst_didisi_or <"asr", int_hexagon_S2_asr_r_p_or>; +def Hexagon_S2_lsl_r_p_or: + di_SInst_didisi_or <"lsl", int_hexagon_S2_lsl_r_p_or>; +def Hexagon_S2_lsr_r_p_or: + di_SInst_didisi_or <"lsr", int_hexagon_S2_lsr_r_p_or>; + +// STYPE / SHIFT / Shift by register with saturation. +def Hexagon_S2_asl_r_r_sat: + si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_r_r_sat>; +def Hexagon_S2_asr_r_r_sat: + si_SInst_sisi_sat <"asr", int_hexagon_S2_asr_r_r_sat>; + +// STYPE / SHIFT / Table Index. +def HEXAGON_S2_tableidxb_goodsyntax: + si_MInst_sisiu4u5 <"tableidxb",int_hexagon_S2_tableidxb_goodsyntax>; +def HEXAGON_S2_tableidxd_goodsyntax: + si_MInst_sisiu4u5 <"tableidxd",int_hexagon_S2_tableidxd_goodsyntax>; +def HEXAGON_S2_tableidxh_goodsyntax: + si_MInst_sisiu4u5 <"tableidxh",int_hexagon_S2_tableidxh_goodsyntax>; +def HEXAGON_S2_tableidxw_goodsyntax: + si_MInst_sisiu4u5 <"tableidxw",int_hexagon_S2_tableidxw_goodsyntax>; + + +/******************************************************************** +* STYPE/VH * +*********************************************************************/ + +// STYPE / VH / Vector absolute value halfwords. +// Rdd64=vabsh(Rss64) +def Hexagon_A2_vabsh: + di_SInst_di <"vabsh", int_hexagon_A2_vabsh>; +def Hexagon_A2_vabshsat: + di_SInst_di_sat <"vabsh", int_hexagon_A2_vabshsat>; + +// STYPE / VH / Vector shift halfwords by immediate. +// Rdd64=v[asl/asr/lsr]h(Rss64,Rt32) +def Hexagon_S2_asl_i_vh: + di_SInst_disi <"vaslh", int_hexagon_S2_asl_i_vh>; +def Hexagon_S2_asr_i_vh: + di_SInst_disi <"vasrh", int_hexagon_S2_asr_i_vh>; +def Hexagon_S2_lsr_i_vh: + di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_i_vh>; + +// STYPE / VH / Vector shift halfwords by register. +// Rdd64=v[asl/asr/lsl/lsr]w(Rss64,Rt32) +def Hexagon_S2_asl_r_vh: + di_SInst_disi <"vaslh", int_hexagon_S2_asl_r_vh>; +def Hexagon_S2_asr_r_vh: + di_SInst_disi <"vasrh", int_hexagon_S2_asr_r_vh>; +def Hexagon_S2_lsl_r_vh: + di_SInst_disi <"vlslh", int_hexagon_S2_lsl_r_vh>; +def Hexagon_S2_lsr_r_vh: + di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_r_vh>; + + +/******************************************************************** +* STYPE/VW * +*********************************************************************/ + +// STYPE / VW / Vector absolute value words. +def Hexagon_A2_vabsw: + di_SInst_di <"vabsw", int_hexagon_A2_vabsw>; +def Hexagon_A2_vabswsat: + di_SInst_di_sat <"vabsw", int_hexagon_A2_vabswsat>; + +// STYPE / VW / Vector shift words by immediate. +// Rdd64=v[asl/vsl]w(Rss64,Rt32) +def Hexagon_S2_asl_i_vw: + di_SInst_disi <"vaslw", int_hexagon_S2_asl_i_vw>; +def Hexagon_S2_asr_i_vw: + di_SInst_disi <"vasrw", int_hexagon_S2_asr_i_vw>; +def Hexagon_S2_lsr_i_vw: + di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_i_vw>; + +// STYPE / VW / Vector shift words by register. +// Rdd64=v[asl/vsl]w(Rss64,Rt32) +def Hexagon_S2_asl_r_vw: + di_SInst_disi <"vaslw", int_hexagon_S2_asl_r_vw>; +def Hexagon_S2_asr_r_vw: + di_SInst_disi <"vasrw", int_hexagon_S2_asr_r_vw>; +def Hexagon_S2_lsl_r_vw: + di_SInst_disi <"vlslw", int_hexagon_S2_lsl_r_vw>; +def Hexagon_S2_lsr_r_vw: + di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_r_vw>; + +// STYPE / VW / Vector shift words with truncate and pack. +def Hexagon_S2_asr_r_svw_trun: + si_SInst_disi <"vasrw", int_hexagon_S2_asr_r_svw_trun>; +def Hexagon_S2_asr_i_svw_trun: + si_SInst_diu5 <"vasrw", int_hexagon_S2_asr_i_svw_trun>; + +include "HexagonIntrinsicsV3.td" +include "HexagonIntrinsicsV4.td" diff --git a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td new file mode 100644 index 0000000..68eaf68 --- /dev/null +++ b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td @@ -0,0 +1,29 @@ +//===-- HexagonIntrinsicsDerived.td - Derived intrinsics ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Multiply 64-bit and use lower result +// +// Optimized with intrinisics accumulates +// +def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2), + (COMBINE_rr + (Hexagon_M2_maci + (Hexagon_M2_maci (EXTRACT_SUBREG (MPYU64 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg)), + subreg_hireg), + (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)), + (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), + (EXTRACT_SUBREG (MPYU64 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg)), + subreg_loreg))>; + + + diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV3.td b/lib/Target/Hexagon/HexagonIntrinsicsV3.td new file mode 100644 index 0000000..2a54e62 --- /dev/null +++ b/lib/Target/Hexagon/HexagonIntrinsicsV3.td @@ -0,0 +1,50 @@ +//=- HexagonIntrinsicsV3.td - Target Description for Hexagon -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V3 Compiler Intrinsics in TableGen format. +// +//===----------------------------------------------------------------------===// + + + + +// MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary. +def Hexagon_M2_vrcmpys_s1: + di_MInst_disi_s1_sat <"vrcmpys", int_hexagon_M2_vrcmpys_s1>; +def Hexagon_M2_vrcmpys_acc_s1: + di_MInst_didisi_acc_s1_sat <"vrcmpys", int_hexagon_M2_vrcmpys_acc_s1>; +def Hexagon_M2_vrcmpys_s1rp: + si_MInst_disi_s1_rnd_sat <"vrcmpys", int_hexagon_M2_vrcmpys_s1rp>; + + + + +/******************************************************************** +* MTYPE/VB * +*********************************************************************/ + +// MTYPE / VB / Vector reduce add unsigned bytes. +def Hexagon_M2_vradduh: + si_MInst_didi <"vradduh", int_hexagon_M2_vradduh>; + + +/******************************************************************** +* ALU64/ALU * +*********************************************************************/ + +// ALU64 / ALU / Add. +def Hexagon_A2_addsp: + di_ALU64_sidi <"add", int_hexagon_A2_addsp>; +def Hexagon_A2_addpsat: + di_ALU64_didi <"add", int_hexagon_A2_addpsat>; + +def Hexagon_A2_maxp: + di_ALU64_didi <"max", int_hexagon_A2_maxp>; +def Hexagon_A2_maxup: + di_ALU64_didi <"maxu", int_hexagon_A2_maxup>; diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/lib/Target/Hexagon/HexagonIntrinsicsV4.td new file mode 100644 index 0000000..dd28ebb --- /dev/null +++ b/lib/Target/Hexagon/HexagonIntrinsicsV4.td @@ -0,0 +1,369 @@ +//===- HexagonIntrinsicsV4.td - V4 Instruction intrinsics --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is populated based on the following specs: +// Hexagon V4 Architecture Extensions +// Application-Level Specification +// 80-V9418-12 Rev. A +// June 15, 2010 + + +// +// ALU 32 types. +// + +class si_ALU32_sisi_not<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_ALU32_s8si<string opc, Intrinsic IntID> + : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")), + [(set DoubleRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>; + +class di_ALU32_sis8<string opc, Intrinsic IntID> + : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_neg_ALU32_sisi<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class qi_neg_ALU32_sis10<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2), + !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_neg_ALU32_siu9<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2), + !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_neg_ALU32_sisi<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_neg_ALU32_sis8<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), + !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_ALU32_sis8<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + + +// +// SInst Classes. +// +class qi_neg_SInst_qiqi<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class qi_SInst_qi_andqiqi_neg<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, and($src2, !$src3)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +class qi_SInst_qi_andqiqi<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, and($src2, $src3)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +class qi_SInst_qi_orqiqi_neg<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, or($src2, !$src3)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +class qi_SInst_qi_orqiqi<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, or($src2, $src3)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_SInst_si_addsis6<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, add($src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + imm:$src3))]>; + +class si_SInst_si_subs6si<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, sub(#$src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, + IntRegs:$src3))]>; + +class di_ALU64_didi_neg<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class di_MInst_dididi_xacc<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst ^= ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_and<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst &= ", !strconcat(opc , "($src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_MInst_sisisi_andn<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst &= ", !strconcat(opc , "($src2, ~$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_SInst_sisis10_andi<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s10Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, and($src2, #$src3))")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + imm:$src3))]>; + +class si_MInst_sisisi_xor<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_MInst_sisisi_xorn<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst ^= ", !strconcat(opc , "($src2, ~$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_SInst_sisis10_or<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, s10Imm:$src3), + !strconcat("$dst |= ", !strconcat(opc , "($src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + imm:$src3))]>; + +class si_MInst_sisisi_or<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst |= ", !strconcat(opc , "($src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_MInst_sisisi_orn<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst |= ", !strconcat(opc , "($src2, ~$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_SInst_siu5_sat<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + + +/******************************************************************** +* ALU32/ALU * +*********************************************************************/ + +// ALU32 / ALU / Logical Operations. +def Hexagon_A4_orn : si_ALU32_sisi_not <"or", int_hexagon_A4_orn>; +def Hexagon_A4_andn : si_ALU32_sisi_not <"and", int_hexagon_A4_andn>; + + +/******************************************************************** +* ALU32/PERM * +*********************************************************************/ + +// ALU32 / PERM / Combine Words Into Doublewords. +def Hexagon_A4_combineir : di_ALU32_s8si <"combine", int_hexagon_A4_combineir>; +def Hexagon_A4_combineri : di_ALU32_sis8 <"combine", int_hexagon_A4_combineri>; + + +/******************************************************************** +* ALU32/PRED * +*********************************************************************/ + +// ALU32 / PRED / Conditional Shift Halfword. +// ALU32 / PRED / Conditional Sign Extend. +// ALU32 / PRED / Conditional Zero Extend. +// ALU32 / PRED / Compare. +def Hexagon_C4_cmpneq : qi_neg_ALU32_sisi <"cmp.eq", int_hexagon_C4_cmpneq>; +def Hexagon_C4_cmpneqi : qi_neg_ALU32_sis10 <"cmp.eq", int_hexagon_C4_cmpneqi>; +def Hexagon_C4_cmplte : qi_neg_ALU32_sisi <"cmp.gt", int_hexagon_C4_cmplte>; +def Hexagon_C4_cmpltei : qi_neg_ALU32_sis10 <"cmp.gt", int_hexagon_C4_cmpltei>; +def Hexagon_C4_cmplteu : qi_neg_ALU32_sisi <"cmp.gtu",int_hexagon_C4_cmplteu>; +def Hexagon_C4_cmplteui: qi_neg_ALU32_siu9 <"cmp.gtu",int_hexagon_C4_cmplteui>; + +// ALU32 / PRED / cmpare To General Register. +def Hexagon_A4_rcmpneq : si_neg_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpneq>; +def Hexagon_A4_rcmpneqi: si_neg_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpneqi>; +def Hexagon_A4_rcmpeq : si_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpeq>; +def Hexagon_A4_rcmpeqi : si_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpeqi>; + + +/******************************************************************** +* CR * +*********************************************************************/ + +// CR / Corner Detection Acceleration. +def Hexagon_C4_fastcorner9: + qi_SInst_qiqi<"fastcorner9", int_hexagon_C4_fastcorner9>; +def Hexagon_C4_fastcorner9_not: + qi_neg_SInst_qiqi<"fastcorner9",int_hexagon_C4_fastcorner9_not>; + +// CR / Logical Operations On Predicates. +def Hexagon_C4_and_andn: + qi_SInst_qi_andqiqi_neg <"and", int_hexagon_C4_and_andn>; +def Hexagon_C4_and_and: + qi_SInst_qi_andqiqi <"and", int_hexagon_C4_and_and>; +def Hexagon_C4_and_orn: + qi_SInst_qi_orqiqi_neg <"and", int_hexagon_C4_and_orn>; +def Hexagon_C4_and_or: + qi_SInst_qi_orqiqi <"and", int_hexagon_C4_and_or>; +def Hexagon_C4_or_andn: + qi_SInst_qi_andqiqi_neg <"or", int_hexagon_C4_or_andn>; +def Hexagon_C4_or_and: + qi_SInst_qi_andqiqi <"or", int_hexagon_C4_or_and>; +def Hexagon_C4_or_orn: + qi_SInst_qi_orqiqi_neg <"or", int_hexagon_C4_or_orn>; +def Hexagon_C4_or_or: + qi_SInst_qi_orqiqi <"or", int_hexagon_C4_or_or>; + + +/******************************************************************** +* XTYPE/ALU * +*********************************************************************/ + +// XTYPE / ALU / Add And Accumulate. +def Hexagon_S4_addaddi: + si_SInst_si_addsis6 <"add", int_hexagon_S4_addaddi>; +def Hexagon_S4_subaddi: + si_SInst_si_subs6si <"add", int_hexagon_S4_subaddi>; + +// XTYPE / ALU / Logical Doublewords. +def Hexagon_S4_andnp: + di_ALU64_didi_neg <"and", int_hexagon_A4_andnp>; +def Hexagon_S4_ornp: + di_ALU64_didi_neg <"or", int_hexagon_A4_ornp>; + +// XTYPE / ALU / Logical-logical Doublewords. +def Hexagon_M4_xor_xacc: + di_MInst_dididi_xacc <"xor", int_hexagon_M4_xor_xacc>; + +// XTYPE / ALU / Logical-logical Words. +def HEXAGON_M4_and_and: + si_MInst_sisisi_and <"and", int_hexagon_M4_and_and>; +def HEXAGON_M4_and_or: + si_MInst_sisisi_and <"or", int_hexagon_M4_and_or>; +def HEXAGON_M4_and_xor: + si_MInst_sisisi_and <"xor", int_hexagon_M4_and_xor>; +def HEXAGON_M4_and_andn: + si_MInst_sisisi_andn <"and", int_hexagon_M4_and_andn>; +def HEXAGON_M4_xor_and: + si_MInst_sisisi_xor <"and", int_hexagon_M4_xor_and>; +def HEXAGON_M4_xor_or: + si_MInst_sisisi_xor <"or", int_hexagon_M4_xor_or>; +def HEXAGON_M4_xor_andn: + si_MInst_sisisi_xorn <"and", int_hexagon_M4_xor_andn>; +def HEXAGON_M4_or_and: + si_MInst_sisisi_or <"and", int_hexagon_M4_or_and>; +def HEXAGON_M4_or_or: + si_MInst_sisisi_or <"or", int_hexagon_M4_or_or>; +def HEXAGON_M4_or_xor: + si_MInst_sisisi_or <"xor", int_hexagon_M4_or_xor>; +def HEXAGON_M4_or_andn: + si_MInst_sisisi_orn <"and", int_hexagon_M4_or_andn>; +def HEXAGON_S4_or_andix: + si_SInst_sisis10_andi <"or", int_hexagon_S4_or_andix>; +def HEXAGON_S4_or_andi: + si_SInst_sisis10_or <"and", int_hexagon_S4_or_andi>; +def HEXAGON_S4_or_ori: + si_SInst_sisis10_or <"or", int_hexagon_S4_or_ori>; + +// XTYPE / ALU / Modulo wrap. +def HEXAGON_A4_modwrapu: + si_ALU64_sisi <"modwrap", int_hexagon_A4_modwrapu>; + +// XTYPE / ALU / Round. +def HEXAGON_A4_cround_ri: + si_SInst_siu5 <"cround", int_hexagon_A4_cround_ri>; +def HEXAGON_A4_cround_rr: + si_SInst_sisi <"cround", int_hexagon_A4_cround_rr>; +def HEXAGON_A4_round_ri: + si_SInst_siu5 <"round", int_hexagon_A4_round_ri>; +def HEXAGON_A4_round_rr: + si_SInst_sisi <"round", int_hexagon_A4_round_rr>; +def HEXAGON_A4_round_ri_sat: + si_SInst_siu5_sat <"round", int_hexagon_A4_round_ri_sat>; +def HEXAGON_A4_round_rr_sat: + si_SInst_sisi_sat <"round", int_hexagon_A4_round_rr_sat>; + +// XTYPE / ALU / Vector reduce add unsigned halfwords. +// XTYPE / ALU / Vector add bytes. +// XTYPE / ALU / Vector conditional negate. +// XTYPE / ALU / Vector maximum bytes. +// XTYPE / ALU / Vector reduce maximum halfwords. +// XTYPE / ALU / Vector reduce maximum words. +// XTYPE / ALU / Vector minimum bytes. +// XTYPE / ALU / Vector reduce minimum halfwords. +// XTYPE / ALU / Vector reduce minimum words. +// XTYPE / ALU / Vector subtract bytes. + + +/******************************************************************** +* XTYPE/BIT * +*********************************************************************/ + +// XTYPE / BIT / Count leading. +// XTYPE / BIT / Count trailing. +// XTYPE / BIT / Extract bitfield. +// XTYPE / BIT / Masked parity. +// XTYPE / BIT / Bit reverse. +// XTYPE / BIT / Split bitfield. + + +/******************************************************************** +* XTYPE/COMPLEX * +*********************************************************************/ + +// XTYPE / COMPLEX / Complex add/sub halfwords. +// XTYPE / COMPLEX / Complex add/sub words. +// XTYPE / COMPLEX / Complex multiply 32x16. +// XTYPE / COMPLEX / Vector reduce complex rotate. + + +/******************************************************************** +* XTYPE/MPY * +*********************************************************************/ + +// XTYPE / COMPLEX / Complex add/sub halfwords. diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h new file mode 100644 index 0000000..0318c51 --- /dev/null +++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h @@ -0,0 +1,75 @@ +//=- HexagonMachineFuctionInfo.h - Hexagon machine function info --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonMACHINEFUNCTIONINFO_H +#define HexagonMACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + + namespace Hexagon { + const unsigned int StartPacket = 0x1; + const unsigned int EndPacket = 0x2; + } + + +/// Hexagon target-specific information for each MachineFunction. +class HexagonMachineFunctionInfo : public MachineFunctionInfo { + // SRetReturnReg - Some subtargets require that sret lowering includes + // returning the value of the returned struct in a register. This field + // holds the virtual register into which the sret argument is passed. + unsigned SRetReturnReg; + std::vector<MachineInstr*> AllocaAdjustInsts; + int VarArgsFrameIndex; + bool HasClobberLR; + + std::map<const MachineInstr*, unsigned> PacketInfo; + + +public: + HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0) {} + + HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0), + HasClobberLR(0) {} + + unsigned getSRetReturnReg() const { return SRetReturnReg; } + void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } + + void addAllocaAdjustInst(MachineInstr* MI) { + AllocaAdjustInsts.push_back(MI); + } + const std::vector<MachineInstr*>& getAllocaAdjustInsts() { + return AllocaAdjustInsts; + } + + void setVarArgsFrameIndex(int v) { VarArgsFrameIndex = v; } + int getVarArgsFrameIndex() { return VarArgsFrameIndex; } + + void setStartPacket(MachineInstr* MI) { + PacketInfo[MI] |= Hexagon::StartPacket; + } + void setEndPacket(MachineInstr* MI) { + PacketInfo[MI] |= Hexagon::EndPacket; + } + bool isStartPacket(const MachineInstr* MI) const { + return (PacketInfo.count(MI) && + (PacketInfo.find(MI)->second & Hexagon::StartPacket)); + } + bool isEndPacket(const MachineInstr* MI) const { + return (PacketInfo.count(MI) && + (PacketInfo.find(MI)->second & Hexagon::EndPacket)); + } + void setHasClobberLR(bool v) { HasClobberLR = v; } + bool hasClobberLR() const { return HasClobberLR; } + +}; +} // End llvm namespace + +#endif diff --git a/lib/Target/Hexagon/HexagonOptimizeSZExtends.cpp b/lib/Target/Hexagon/HexagonOptimizeSZExtends.cpp new file mode 100644 index 0000000..1229aca --- /dev/null +++ b/lib/Target/Hexagon/HexagonOptimizeSZExtends.cpp @@ -0,0 +1,129 @@ +//===-- HexagonOptimizeSZExtends.cpp - Identify and remove sign and -------===// +//===-- zero extends. -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Constants.h" +#include "llvm/PassSupport.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Support/Debug.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include <algorithm> +#include "Hexagon.h" +#include "HexagonTargetMachine.h" + +using namespace llvm; + +namespace { + struct HexagonOptimizeSZExtends : public MachineFunctionPass { + + public: + static char ID; + HexagonOptimizeSZExtends() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { + return "Hexagon remove redundant zero and size extends"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineFunctionAnalysis>(); + AU.addPreserved<MachineFunctionAnalysis>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + }; +} + +char HexagonOptimizeSZExtends::ID = 0; + +// This is a brain dead pass to get rid of redundant sign extends for the +// following case: +// +// Transform the following pattern +// %vreg170<def> = SXTW %vreg166 +// ... +// %vreg176<def> = COPY %vreg170:subreg_loreg +// +// Into +// %vreg176<def> = COPY vreg166 + +bool HexagonOptimizeSZExtends::runOnMachineFunction(MachineFunction &MF) { + DenseMap<unsigned, unsigned> SExtMap; + + // Loop over all of the basic blocks + for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock* MBB = MBBb; + SExtMap.clear(); + + // Traverse the basic block. + for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); + ++MII) { + MachineInstr *MI = MII; + // Look for sign extends: + // %vreg170<def> = SXTW %vreg166 + if (MI->getOpcode() == Hexagon::SXTW) { + assert (MI->getNumOperands() == 2); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src = MI->getOperand(1); + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src.getReg(); + // Just handle virtual registers. + if (TargetRegisterInfo::isVirtualRegister(DstReg) && + TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Map the following: + // %vreg170<def> = SXTW %vreg166 + // SExtMap[170] = vreg166 + SExtMap[DstReg] = SrcReg; + } + } + // Look for copy: + // %vreg176<def> = COPY %vreg170:subreg_loreg + if (MI->isCopy()) { + assert (MI->getNumOperands() == 2); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src = MI->getOperand(1); + + // Make sure we are copying the lower 32 bits. + if (Src.getSubReg() != Hexagon::subreg_loreg) + continue; + + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src.getReg(); + if (TargetRegisterInfo::isVirtualRegister(DstReg) && + TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Try to find in the map. + if (unsigned SextSrc = SExtMap.lookup(SrcReg)) { + // Change the 1st operand. + MI->RemoveOperand(1); + MI->addOperand(MachineOperand::CreateReg(SextSrc, false)); + } + } + } + } + } + return true; +} + +FunctionPass *llvm::createHexagonOptimizeSZExtends() { + return new HexagonOptimizeSZExtends(); +} diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp new file mode 100644 index 0000000..521e0c1 --- /dev/null +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -0,0 +1,323 @@ +//==- HexagonRegisterInfo.cpp - Hexagon Register Information -----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Hexagon implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "HexagonMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Type.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include <iostream> + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Function.h" +using namespace llvm; + + +HexagonRegisterInfo::HexagonRegisterInfo(HexagonSubtarget &st, + const HexagonInstrInfo &tii) + : HexagonGenRegisterInfo(Hexagon::R31), + Subtarget(st), + TII(tii) { +} + +const unsigned* HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction + *MF) + const { + static const unsigned CalleeSavedRegsV2[] = { + Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0 + }; + static const unsigned CalleeSavedRegsV3[] = { + Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19, + Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, + Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0 + }; + + switch(Subtarget.getHexagonArchVersion()) { + case HexagonSubtarget::V2: + return CalleeSavedRegsV2; + break; + case HexagonSubtarget::V3: + case HexagonSubtarget::V4: + return CalleeSavedRegsV3; + break; + default: + const char *ErrorString = + "Callee saved registers requested for unknown archtecture version"; + llvm_unreachable(ErrorString); + } +} + +BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF) + const { + BitVector Reserved(getNumRegs()); + Reserved.set(HEXAGON_RESERVED_REG_1); + Reserved.set(HEXAGON_RESERVED_REG_2); + Reserved.set(Hexagon::R29); + Reserved.set(Hexagon::R30); + Reserved.set(Hexagon::R31); + Reserved.set(Hexagon::D14); + Reserved.set(Hexagon::D15); + Reserved.set(Hexagon::LC0); + Reserved.set(Hexagon::LC1); + Reserved.set(Hexagon::SA0); + Reserved.set(Hexagon::SA1); + return Reserved; +} + + +const TargetRegisterClass* const* +HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { + static const TargetRegisterClass * const CalleeSavedRegClassesV2[] = { + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + }; + static const TargetRegisterClass * const CalleeSavedRegClassesV3[] = { + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + }; + + switch(Subtarget.getHexagonArchVersion()) { + case HexagonSubtarget::V2: + return CalleeSavedRegClassesV2; + break; + case HexagonSubtarget::V3: + case HexagonSubtarget::V4: + return CalleeSavedRegClassesV3; + break; + default: + const char *ErrorString = + "Callee saved register classes requested for unknown archtecture version"; + llvm_unreachable(ErrorString); + } +} + +void HexagonRegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + MachineInstr &MI = *I; + + if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) { + // Hexagon_TODO: add code + } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) { + // Hexagon_TODO: add code + } else { + assert(0 && "Cannot handle this call frame pseudo instruction"); + } + MBB.erase(I); +} + +void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const { + + // + // Hexagon_TODO: Do we need to enforce this for Hexagon? + assert(SPAdj == 0 && "Unexpected"); + + + unsigned i = 0; + MachineInstr &MI = *II; + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + int FrameIndex = MI.getOperand(i).getIndex(); + + // Addressable stack objects are accessed using neg. offsets from %fp. + MachineFunction &MF = *MI.getParent()->getParent(); + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + unsigned FrameReg = getFrameRegister(MF); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + if (!TFI->hasFP(MF)) { + // We will not reserve space on the stack for the lr and fp registers. + Offset -= 2 * Hexagon_WordSize; + } + + const unsigned FrameSize = MFI.getStackSize(); + + if (!MFI.hasVarSizedObjects() && + TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) && + !TII.isSpillPredRegOp(&MI)) { + // Replace frame index with a stack pointer reference. + MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false, true); + MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset); + } else { + // Replace frame index with a frame pointer reference. + if (!TII.isValidOffset(MI.getOpcode(), Offset)) { + + // If the offset overflows, then correct it. + // + // For loads, we do not need a reserved register + // r0 = memw(r30 + #10000) to: + // + // r0 = add(r30, #10000) + // r0 = memw(r0) + if ( (MI.getOpcode() == Hexagon::LDriw) || + (MI.getOpcode() == Hexagon::LDrid) || + (MI.getOpcode() == Hexagon::LDrih) || + (MI.getOpcode() == Hexagon::LDriuh) || + (MI.getOpcode() == Hexagon::LDrib) || + (MI.getOpcode() == Hexagon::LDriub) ) { + unsigned dstReg = (MI.getOpcode() == Hexagon::LDrid) ? + *getSubRegisters(MI.getOperand(0).getReg()) : + MI.getOperand(0).getReg(); + + // Check if offset can fit in addi. + if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset); + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_rr), + dstReg).addReg(FrameReg).addReg(dstReg); + } else { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_ri), + dstReg).addReg(FrameReg).addImm(Offset); + } + + MI.getOperand(i).ChangeToRegister(dstReg, false, false, true); + MI.getOperand(i+1).ChangeToImmediate(0); + } else if ((MI.getOpcode() == Hexagon::STriw) || + (MI.getOpcode() == Hexagon::STrid) || + (MI.getOpcode() == Hexagon::STrih) || + (MI.getOpcode() == Hexagon::STrib) || + (MI.getOpcode() == Hexagon::STriwt)) { + // For stores, we need a reserved register. Change + // memw(r30 + #10000) = r0 to: + // + // rs = add(r30, #10000); + // memw(rs) = r0 + unsigned resReg = HEXAGON_RESERVED_REG_1; + + // Check if offset can fit in addi. + if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset); + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_rr), + resReg).addReg(FrameReg).addReg(resReg); + } else { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_ri), + resReg).addReg(FrameReg).addImm(Offset); + } + MI.getOperand(i).ChangeToRegister(resReg, false, false, true); + MI.getOperand(i+1).ChangeToImmediate(0); + } else if (TII.isMemOp(&MI)) { + unsigned resReg = HEXAGON_RESERVED_REG_1; + if (!MFI.hasVarSizedObjects() && + TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) { + MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false, + true); + MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset); + } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset); + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_rr), + resReg).addReg(FrameReg).addReg(resReg); + MI.getOperand(i).ChangeToRegister(resReg, false, false, true); + MI.getOperand(i+1).ChangeToImmediate(0); + } else { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_ri), + resReg).addReg(FrameReg).addImm(Offset); + MI.getOperand(i).ChangeToRegister(resReg, false, false, true); + MI.getOperand(i+1).ChangeToImmediate(0); + } + } else { + unsigned dstReg = MI.getOperand(0).getReg(); + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset); + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_rr), + dstReg).addReg(FrameReg).addReg(dstReg); + // Can we delete MI??? r2 = add (r2, #0). + MI.getOperand(i).ChangeToRegister(dstReg, false, false, true); + MI.getOperand(i+1).ChangeToImmediate(0); + } + } else { + // If the offset is small enough to fit in the immediate field, directly + // encode it. + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i+1).ChangeToImmediate(Offset); + } + } + +} + +unsigned HexagonRegisterInfo::getRARegister() const { + return Hexagon::R31; +} + +unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction + &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + if (TFI->hasFP(MF)) { + return Hexagon::R30; + } + + return Hexagon::R29; +} + +unsigned HexagonRegisterInfo::getFrameRegister() const { + return Hexagon::R30; +} + +unsigned HexagonRegisterInfo::getStackRegister() const { + return Hexagon::R29; +} + +void HexagonRegisterInfo::getInitialFrameState(std::vector<MachineMove> + &Moves) const +{ + // VirtualFP = (R30 + #0). + unsigned FPReg = getFrameRegister(); + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(FPReg, 0); + Moves.push_back(MachineMove(0, Dst, Src)); +} + +unsigned HexagonRegisterInfo::getEHExceptionRegister() const { + assert(0 && "What is the exception register"); + return 0; +} + +unsigned HexagonRegisterInfo::getEHHandlerRegister() const { + assert(0 && "What is the exception handler register"); + return 0; +} + +#define GET_REGINFO_TARGET_DESC +#include "HexagonGenRegisterInfo.inc" diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h new file mode 100644 index 0000000..33b0c14 --- /dev/null +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -0,0 +1,89 @@ +//==- HexagonRegisterInfo.h - Hexagon Register Information Impl --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Hexagon implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonREGISTERINFO_H +#define HexagonREGISTERINFO_H + +#include "llvm/Target/TargetRegisterInfo.h" +#define GET_REGINFO_HEADER +#include "HexagonGenRegisterInfo.inc" +#include "llvm/MC/MachineLocation.h" + +// +// We try not to hard code the reserved registers in our code, +// so the following two macros were defined. However, there +// are still a few places that R11 and R10 are hard wired. +// See below. If, in the future, we decided to change the reserved +// register. Don't forget changing the following places. +// +// 1. the "Defs" set of STriw_pred in HexagonInstrInfo.td +// 2. the "Defs" set of LDri_pred in HexagonInstrInfo.td +// 3. the definition of "IntRegs" in HexagonRegisterInfo.td +// 4. the definition of "DoubleRegs" in HexagonRegisterInfo.td +// +#define HEXAGON_RESERVED_REG_1 Hexagon::R10 +#define HEXAGON_RESERVED_REG_2 Hexagon::R11 + +namespace llvm { + +class HexagonSubtarget; +class HexagonInstrInfo; +class Type; + +struct HexagonRegisterInfo : public HexagonGenRegisterInfo { + HexagonSubtarget &Subtarget; + const HexagonInstrInfo &TII; + + HexagonRegisterInfo(HexagonSubtarget &st, const HexagonInstrInfo &tii); + + /// Code Generation virtual methods... + const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + + const TargetRegisterClass* const* getCalleeSavedRegClasses( + const MachineFunction *MF = 0) const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; + + /// determineFrameLayout - Determine the size of the frame and maximum call + /// frame size. + void determineFrameLayout(MachineFunction &MF) const; + + /// requiresRegisterScavenging - returns true since we may need scavenging for + /// a temporary register when generating hardware loop instructions. + bool requiresRegisterScavenging(const MachineFunction &MF) const { + return true; + } + + // Debug information queries. + unsigned getRARegister() const; + unsigned getFrameRegister(const MachineFunction &MF) const; + unsigned getFrameRegister() const; + void getInitialFrameState(std::vector<MachineMove> &Moves) const; + unsigned getStackRegister() const; + + // Exception handling queries. + unsigned getEHExceptionRegister() const; + unsigned getEHHandlerRegister() const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td new file mode 100644 index 0000000..c05f844 --- /dev/null +++ b/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -0,0 +1,169 @@ +//===- HexagonRegisterInfo.td - Hexagon Register defs ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the Hexagon register file. +//===----------------------------------------------------------------------===// + +class HexagonReg<string n> : Register<n> { + field bits<5> Num; + let Namespace = "Hexagon"; +} + +class HexagonDoubleReg<string n, list<Register> subregs> : + RegisterWithSubRegs<n, subregs> { + field bits<5> Num; + let Namespace = "Hexagon"; +} + +// Registers are identified with 5-bit ID numbers. +// Ri - 32-bit integer registers. +class Ri<bits<5> num, string n> : HexagonReg<n> { + let Num = num; +} + +// Rf - 32-bit floating-point registers. +class Rf<bits<5> num, string n> : HexagonReg<n> { + let Num = num; +} + + +// Rd - 64 bit registers. +class Rd<bits<5> num, string n, list<Register> subregs> : +HexagonDoubleReg<n, subregs> { + let Num = num; + let SubRegs = subregs; +} + + +class Rp<bits<5> num, string n> : HexagonReg<n> { + let Num = num; +} + +class Rc<bits<5> num, string n> : HexagonReg<n> { + let Num = num; +} + +let Namespace = "Hexagon" in { + + def subreg_loreg : SubRegIndex; + def subreg_hireg : SubRegIndex; + + // Integer registers. + def R0 : Ri< 0, "r0">, DwarfRegNum<[0]>; + def R1 : Ri< 1, "r1">, DwarfRegNum<[1]>; + def R2 : Ri< 2, "r2">, DwarfRegNum<[2]>; + def R3 : Ri< 3, "r3">, DwarfRegNum<[3]>; + def R4 : Ri< 4, "r4">, DwarfRegNum<[4]>; + def R5 : Ri< 5, "r5">, DwarfRegNum<[5]>; + def R6 : Ri< 6, "r6">, DwarfRegNum<[6]>; + def R7 : Ri< 7, "r7">, DwarfRegNum<[7]>; + def R8 : Ri< 8, "r8">, DwarfRegNum<[8]>; + def R9 : Ri< 9, "r9">, DwarfRegNum<[9]>; + def R10 : Ri<10, "r10">, DwarfRegNum<[10]>; + def R11 : Ri<11, "r11">, DwarfRegNum<[11]>; + def R12 : Ri<12, "r12">, DwarfRegNum<[12]>; + def R13 : Ri<13, "r13">, DwarfRegNum<[13]>; + def R14 : Ri<14, "r14">, DwarfRegNum<[14]>; + def R15 : Ri<15, "r15">, DwarfRegNum<[15]>; + def R16 : Ri<16, "r16">, DwarfRegNum<[16]>; + def R17 : Ri<17, "r17">, DwarfRegNum<[17]>; + def R18 : Ri<18, "r18">, DwarfRegNum<[18]>; + def R19 : Ri<19, "r19">, DwarfRegNum<[19]>; + def R20 : Ri<20, "r20">, DwarfRegNum<[20]>; + def R21 : Ri<21, "r21">, DwarfRegNum<[21]>; + def R22 : Ri<22, "r22">, DwarfRegNum<[22]>; + def R23 : Ri<23, "r23">, DwarfRegNum<[23]>; + def R24 : Ri<24, "r24">, DwarfRegNum<[24]>; + def R25 : Ri<25, "r25">, DwarfRegNum<[25]>; + def R26 : Ri<26, "r26">, DwarfRegNum<[26]>; + def R27 : Ri<27, "r27">, DwarfRegNum<[27]>; + def R28 : Ri<28, "r28">, DwarfRegNum<[28]>; + def R29 : Ri<29, "r29">, DwarfRegNum<[29]>; + def R30 : Ri<30, "r30">, DwarfRegNum<[30]>; + def R31 : Ri<31, "r31">, DwarfRegNum<[31]>; + + + def PC : Ri<31, "r31">, DwarfRegNum<[32]>; + def GP : Ri<31, "r31">, DwarfRegNum<[33]>; + + // Aliases of the R* registers used to hold 64-bit int values (doubles). + let SubRegIndices = [subreg_loreg, subreg_hireg] in { + def D0 : Rd< 0, "r1:0", [R0, R1]>, DwarfRegNum<[32]>; + def D1 : Rd< 2, "r3:2", [R2, R3]>, DwarfRegNum<[34]>; + def D2 : Rd< 4, "r5:4", [R4, R5]>, DwarfRegNum<[36]>; + def D3 : Rd< 6, "r7:6", [R6, R7]>, DwarfRegNum<[38]>; + def D4 : Rd< 8, "r9:8", [R8, R9]>, DwarfRegNum<[40]>; + def D5 : Rd<10, "r11:10", [R10, R11]>, DwarfRegNum<[42]>; + def D6 : Rd<12, "r13:12", [R12, R13]>, DwarfRegNum<[44]>; + def D7 : Rd<14, "r15:14", [R14, R15]>, DwarfRegNum<[46]>; + def D8 : Rd<16, "r17:16", [R16, R17]>, DwarfRegNum<[48]>; + def D9 : Rd<18, "r19:18", [R18, R19]>, DwarfRegNum<[50]>; + def D10 : Rd<20, "r21:20", [R20, R21]>, DwarfRegNum<[52]>; + def D11 : Rd<22, "r23:22", [R22, R23]>, DwarfRegNum<[54]>; + def D12 : Rd<24, "r25:24", [R24, R25]>, DwarfRegNum<[56]>; + def D13 : Rd<26, "r27:26", [R26, R27]>, DwarfRegNum<[58]>; + def D14 : Rd<28, "r29:28", [R28, R29]>, DwarfRegNum<[60]>; + def D15 : Rd<30, "r31:30", [R30, R31]>, DwarfRegNum<[62]>; + } + + // Predicate registers. + def P0 : Rp< 0, "p0">, DwarfRegNum<[63]>; + def P1 : Rp< 0, "p1">, DwarfRegNum<[64]>; + def P2 : Rp< 0, "p2">, DwarfRegNum<[65]>; + def P3 : Rp< 0, "p3">, DwarfRegNum<[66]>; + + // Control registers. + def SA0 : Rc<0, "sa0">, DwarfRegNum<[67]>; + def LC0 : Rc<0, "lc0">, DwarfRegNum<[68]>; + + def SA1 : Rc<0, "sa1">, DwarfRegNum<[69]>; + def LC1 : Rc<0, "lc1">, DwarfRegNum<[70]>; +} + + + + + + + + + + +// Register classes. +// +// FIXME: the register order should be defined in terms of the preferred +// allocation order... +// +def IntRegs : RegisterClass<"Hexagon", [i32], 32, (add (sequence "R%u", 0, 9), + (sequence "R%u", 12, 28), + R10, R11, R29, R30, + R31)> { +} + + + +def DoubleRegs : RegisterClass<"Hexagon", [i64], 64, (add (sequence "D%u", 0, + 4), + (sequence "D%u", 6, 13), + D5, D14, D15)> { + let SubRegClasses = [(IntRegs subreg_loreg, subreg_hireg)]; +} + + +def PredRegs : RegisterClass<"Hexagon", [i1], 32, (add (sequence "P%u", 0, 3))> +{ + let Size = 32; +} + +def CRRegs : RegisterClass<"Hexagon", [i32], 32, (add (sequence "LC%u", 0, 1), + (sequence "SA%u", 0, 1), + PC)> { + let Size = 32; +} diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp new file mode 100644 index 0000000..3ca257f --- /dev/null +++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp @@ -0,0 +1,85 @@ +//=- HexagonRemoveExtendArgs.cpp - Remove unecessary argument sign extends --=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Pass that removes sign extends for function parameters. These parameters +// are already sign extended by the caller per Hexagon's ABI +// +//===----------------------------------------------------------------------===// + + + +#include "llvm/Pass.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "HexagonTargetMachine.h" +#include <iostream> + +using namespace llvm; +namespace { + struct HexagonRemoveExtendArgs : public FunctionPass { + public: + static char ID; + HexagonRemoveExtendArgs() : FunctionPass(ID) {} + virtual bool runOnFunction(Function &F); + + const char *getPassName() const { + return "Remove sign extends"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineFunctionAnalysis>(); + AU.addPreserved<MachineFunctionAnalysis>(); + FunctionPass::getAnalysisUsage(AU); + } + }; +} + +char HexagonRemoveExtendArgs::ID = 0; +RegisterPass<HexagonRemoveExtendArgs> X("reargs", + "Remove Sign and Zero Extends for Args" + ); + + + +bool HexagonRemoveExtendArgs::runOnFunction(Function &F) { + unsigned Idx = 1; + for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; + ++AI, ++Idx) { + if (F.paramHasAttr(Idx, Attribute::SExt)) { + Argument* Arg = AI; + if (!isa<PointerType>(Arg->getType())) { + for (Instruction::use_iterator UI = Arg->use_begin(); + UI != Arg->use_end();) { + if (isa<SExtInst>(*UI)) { + Instruction* Use = cast<Instruction>(*UI); + SExtInst* SI = new SExtInst(Arg, Use->getType()); + assert (EVT::getEVT(SI->getType()) == + (EVT::getEVT(Use->getType()))); + ++UI; + Use->replaceAllUsesWith(SI); + Instruction* First = F.getEntryBlock().begin(); + SI->insertBefore(First); + Use->eraseFromParent(); + } else { + ++UI; + } + } + } + } + } + return true; +} + + + +FunctionPass *llvm::createHexagonRemoveExtendOps(HexagonTargetMachine &TM) { + return new HexagonRemoveExtendArgs(); +} diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td new file mode 100644 index 0000000..427d1cb --- /dev/null +++ b/lib/Target/Hexagon/HexagonSchedule.td @@ -0,0 +1,53 @@ +//===-HexagonSchedule.td - Hexagon Scheduling Definitions -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Functional Units +def LUNIT : FuncUnit; +def LSUNIT : FuncUnit; +def MUNIT : FuncUnit; +def SUNIT : FuncUnit; + + +// Itinerary classes +def ALU32 : InstrItinClass; +def ALU64 : InstrItinClass; +def CR : InstrItinClass; +def J : InstrItinClass; +def JR : InstrItinClass; +def LD : InstrItinClass; +def M : InstrItinClass; +def ST : InstrItinClass; +def S : InstrItinClass; +def PSEUDO : InstrItinClass; + + +def HexagonItineraries : + ProcessorItineraries<[LUNIT, LSUNIT, MUNIT, SUNIT], [], [ + InstrItinData<ALU32 , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>, + InstrItinData<ALU64 , [InstrStage<1, [MUNIT, SUNIT]>]>, + InstrItinData<CR , [InstrStage<1, [SUNIT]>]>, + InstrItinData<J , [InstrStage<1, [SUNIT, MUNIT]>]>, + InstrItinData<JR , [InstrStage<1, [MUNIT]>]>, + InstrItinData<LD , [InstrStage<1, [LUNIT, LSUNIT]>]>, + InstrItinData<M , [InstrStage<1, [MUNIT, SUNIT]>]>, + InstrItinData<ST , [InstrStage<1, [LSUNIT]>]>, + InstrItinData<S , [InstrStage<1, [SUNIT, MUNIT]>]>, + InstrItinData<PSEUDO , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]> +]>; + + +//===----------------------------------------------------------------------===// +// V4 Machine Info + +//===----------------------------------------------------------------------===// + +include "HexagonScheduleV4.td" + +//===----------------------------------------------------------------------===// +// V4 Machine Info - +//===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td new file mode 100644 index 0000000..4cf66fe --- /dev/null +++ b/lib/Target/Hexagon/HexagonScheduleV4.td @@ -0,0 +1,56 @@ +//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine. +// This file describes that machine information. + +// +// |===========|==================================================| +// | PIPELINE | Instruction Classes | +// |===========|==================================================| +// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM | +// |-----------|--------------------------------------------------| +// | SLOT1 | LD ST ALU32 | +// |-----------|--------------------------------------------------| +// | SLOT2 | XTYPE ALU32 J JR | +// |-----------|--------------------------------------------------| +// | SLOT3 | XTYPE ALU32 J CR | +// |===========|==================================================| + + +// Functional Units. +def SLOT0 : FuncUnit; +def SLOT1 : FuncUnit; +def SLOT2 : FuncUnit; +def SLOT3 : FuncUnit; + +// Itinerary classes. +def NV_V4 : InstrItinClass; +def MEM_V4 : InstrItinClass; +// ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4. + +def HexagonItinerariesV4 : ProcessorItineraries< + [SLOT0, SLOT1, SLOT2, SLOT3], [], [ + InstrItinData<LD , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<ST , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<ALU32 , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<NV_V4 , [InstrStage<1, [SLOT0]>]>, + InstrItinData<MEM_V4 , [InstrStage<1, [SLOT0]>]>, + InstrItinData<J , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<JR , [InstrStage<1, [SLOT2]>]>, + InstrItinData<CR , [InstrStage<1, [SLOT3]>]>, + InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU64 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<M , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<S , [InstrStage<1, [SLOT2, SLOT3]>]> +]>; + +//===----------------------------------------------------------------------===// +// Hexagon V4 Resource Definitions - +//===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonSelectCCInfo.td b/lib/Target/Hexagon/HexagonSelectCCInfo.td new file mode 100644 index 0000000..f21d928 --- /dev/null +++ b/lib/Target/Hexagon/HexagonSelectCCInfo.td @@ -0,0 +1,121 @@ +//=-HexagoSelectCCInfo.td - Selectcc mappings ----------------*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +// +// selectcc mappings. +// +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETEQ)), + (i32 (MUX_rr (i1 (CMPEQrr IntRegs:$lhs, IntRegs:$rhs)), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETNE)), + (i32 (MUX_rr (i1 (NOT_Ps (CMPEQrr IntRegs:$lhs, IntRegs:$rhs))), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETGT)), + (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, IntRegs:$rhs)), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETUGT)), + (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs)), + IntRegs:$tval, IntRegs:$fval))>; + + + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETULT)), + (i32 (MUX_rr (i1 (NOT_Ps (CMPGTUrr IntRegs:$lhs, + (ADD_ri IntRegs:$rhs, -1)))), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETLT)), + (i32 (MUX_rr (i1 (NOT_Ps (CMPGTrr IntRegs:$lhs, + (ADD_ri IntRegs:$rhs, -1)))), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETLE)), + (i32 (MUX_rr (i1 (NOT_Ps (CMPGTrr IntRegs:$lhs, IntRegs:$rhs))), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETULE)), + (i32 (MUX_rr (i1 (NOT_Ps (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs))), + IntRegs:$tval, IntRegs:$fval))>; + + +// +// selectcc mappings for greater-equal-to Rs => greater-than Rs-1. +// +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETGE)), + (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETUGE)), + (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))), + IntRegs:$tval, IntRegs:$fval))>; + + + +// +// selectcc mappings for predicate comparisons. +// +// Convert Rd = selectcc(p0, p1, true_val, false_val, SETEQ) into: +// pt = not(p1 xor p2) +// Rd = mux(pt, true_val, false_val) +// and similarly for SETNE +// +def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETNE)), + (i32 (MUX_rr (i1 (XOR_pp PredRegs:$lhs, PredRegs:$rhs)), IntRegs:$tval, + IntRegs:$fval))>; + +def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETEQ)), + (i32 (MUX_rr (i1 (NOT_pp (XOR_pp PredRegs:$lhs, PredRegs:$rhs))), + IntRegs:$tval, IntRegs:$fval))>; + + +// +// selectcc mappings for 64-bit operands are messy. Hexagon does not have a +// MUX64 o, use this: +// selectcc(Rss, Rdd, tval, fval, cond) -> +// combine(mux(cmp_cond(Rss, Rdd), tval.hi, fval.hi), +// mux(cmp_cond(Rss, Rdd), tval.lo, fval.lo)) + +// setgt-64. +def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval, + DoubleRegs:$fval, SETGT)), + (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs), + (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg), + (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)), + (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs), + (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>; + + +// setlt-64 -> setgt-64. +def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval, + DoubleRegs:$fval, SETLT)), + (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs, + (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))), + (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg), + (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)), + (MUX_rr (CMPGT64rr DoubleRegs:$lhs, + (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))), + (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>; diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp new file mode 100644 index 0000000..a52c604 --- /dev/null +++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp @@ -0,0 +1,46 @@ +//===-- HexagonSelectionDAGInfo.cpp - Hexagon SelectionDAG Info -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the HexagonSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-selectiondag-info" +#include "HexagonTargetMachine.h" +using namespace llvm; + +bool llvm::flag_aligned_memcpy; + +HexagonSelectionDAGInfo::HexagonSelectionDAGInfo(const HexagonTargetMachine + &TM) + : TargetSelectionDAGInfo(TM) { +} + +HexagonSelectionDAGInfo::~HexagonSelectionDAGInfo() { +} + +SDValue +HexagonSelectionDAGInfo:: +EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, + SDValue Dst, SDValue Src, SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + flag_aligned_memcpy = false; + if ((Align & 0x3) == 0) { + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + uint64_t SizeVal = ConstantSize->getZExtValue(); + if ((SizeVal > 32) && ((SizeVal % 8) == 0)) + flag_aligned_memcpy = true; + } + } + + return SDValue(); +} diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h new file mode 100644 index 0000000..86fa026 --- /dev/null +++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h @@ -0,0 +1,40 @@ +//=-- HexagonSelectionDAGInfo.h - Hexagon SelectionDAG Info ------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Hexagon subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonSELECTIONDAGINFO_H +#define HexagonSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class HexagonTargetMachine; + +class HexagonSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + explicit HexagonSelectionDAGInfo(const HexagonTargetMachine &TM); + ~HexagonSelectionDAGInfo(); + + virtual + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const; +}; + +} + +#endif diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp new file mode 100644 index 0000000..f4d3647 --- /dev/null +++ b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp @@ -0,0 +1,136 @@ +//===---- HexagonSplitTFRCondSets.cpp - split TFR condsets into xfers -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// +//===----------------------------------------------------------------------===//// +// This pass tries to provide opportunities for better optimization of muxes. +// The default code generated for something like: flag = (a == b) ? 1 : 3; +// would be: +// +// {p0 = cmp.eq(r0,r1)} +// {r3 = mux(p0,#1,#3)} +// +// This requires two packets. If we use .new predicated immediate transfers, +// then we can do this in a single packet, e.g.: +// +// {p0 = cmp.eq(r0,r1) +// if (p0.new) r3 = #1 +// if (!p0.new) r3 = #3} +// +// Note that the conditional assignments are not generated in .new form here. +// We assume opptimisically that they will be formed later. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "xfer" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "HexagonTargetMachine.h" +#include "HexagonSubtarget.h" +#include "HexagonMachineFunctionInfo.h" +#include <map> +#include <iostream> + +#include "llvm/Support/CommandLine.h" +#define DEBUG_TYPE "xfer" + + +using namespace llvm; + +namespace { + +class HexagonSplitTFRCondSets : public MachineFunctionPass { + HexagonTargetMachine& QTM; + const HexagonSubtarget &QST; + + public: + static char ID; + HexagonSplitTFRCondSets(HexagonTargetMachine& TM) : + MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {} + + const char *getPassName() const { + return "Hexagon Split TFRCondSets"; + } + bool runOnMachineFunction(MachineFunction &Fn); +}; + + +char HexagonSplitTFRCondSets::ID = 0; + + +bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) { + + const TargetInstrInfo *TII = QTM.getInstrInfo(); + + // Loop over all of the basic blocks. + for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock* MBB = MBBb; + // Traverse the basic block. + for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); + ++MII) { + MachineInstr *MI = MII; + int Opc = MI->getOpcode(); + if (Opc == Hexagon::TFR_condset_rr) { + + int DestReg = MI->getOperand(0).getReg(); + int SrcReg1 = MI->getOperand(2).getReg(); + int SrcReg2 = MI->getOperand(3).getReg(); + + // Minor optimization: do not emit the predicated copy if the source and + // the destination is the same register + if (DestReg != SrcReg1) { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_cPt), + DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg1); + } + if (DestReg != SrcReg2) { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_cNotPt), + DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg2); + } + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::TFR_condset_ii) { + int DestReg = MI->getOperand(0).getReg(); + int SrcReg1 = MI->getOperand(1).getReg(); + int Immed1 = MI->getOperand(2).getImm(); + int Immed2 = MI->getOperand(3).getImm(); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFRI_cPt), + DestReg).addReg(SrcReg1).addImm(Immed1); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFRI_cNotPt), + DestReg).addReg(SrcReg1).addImm(Immed2); + MII = MBB->erase(MI); + --MII; + } + } + } + + return true; +} + +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createHexagonSplitTFRCondSets(HexagonTargetMachine &TM) { + return new HexagonSplitTFRCondSets(TM); +} diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp new file mode 100644 index 0000000..83fb498 --- /dev/null +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -0,0 +1,59 @@ +//===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Hexagon specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "HexagonSubtarget.h" +#include "Hexagon.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +using namespace llvm; + +#define GET_SUBTARGETINFO_CTOR +#define GET_SUBTARGETINFO_TARGET_DESC +#include "HexagonGenSubtargetInfo.inc" + +static cl::opt<bool> +EnableV3("enable-hexagon-v3", cl::Hidden, + cl::desc("Enable Hexagon V3 instructions.")); + +static cl::opt<bool> +EnableMemOps( + "enable-hexagon-memops", + cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, + cl::desc("Generate V4 MEMOP in code generation for Hexagon target")); + +HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS): + HexagonGenSubtargetInfo(TT, CPU, FS), + HexagonArchVersion(V1), + CPUString(CPU.str()) { + ParseSubtargetFeatures(CPU, FS); + + switch(HexagonArchVersion) { + case HexagonSubtarget::V2: + break; + case HexagonSubtarget::V3: + EnableV3 = true; + break; + case HexagonSubtarget::V4: + break; + default: + llvm_unreachable("Unknown Architecture Version."); + } + + // Initialize scheduling itinerary for the specified CPU. + InstrItins = getInstrItineraryForCPU(CPUString); + + if (EnableMemOps) + UseMemOps = true; + else + UseMemOps = false; +} diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h new file mode 100644 index 0000000..6de85df --- /dev/null +++ b/lib/Target/Hexagon/HexagonSubtarget.h @@ -0,0 +1,74 @@ +//==-- HexagonSubtarget.h - Define Subtarget for the Hexagon ----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Hexagon specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef Hexagon_SUBTARGET_H +#define Hexagon_SUBTARGET_H + +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Target/TargetMachine.h" +#include <string> + +#define GET_SUBTARGETINFO_HEADER +#include "HexagonGenSubtargetInfo.inc" + +#define Hexagon_SMALL_DATA_THRESHOLD 8 + +namespace llvm { + +class HexagonSubtarget : public HexagonGenSubtargetInfo { + + bool UseMemOps; + +public: + enum HexagonArchEnum { + V1, V2, V3, V4 + }; + + HexagonArchEnum HexagonArchVersion; + std::string CPUString; + InstrItineraryData InstrItins; + +public: + HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS); + + /// getInstrItins - Return the instruction itineraies based on subtarget + /// selection. + const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } + + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + bool hasV2TOps () const { return HexagonArchVersion >= V2; } + bool hasV2TOpsOnly () const { return HexagonArchVersion == V2; } + bool hasV3TOps () const { return HexagonArchVersion >= V3; } + bool hasV3TOpsOnly () const { return HexagonArchVersion == V3; } + bool hasV4TOps () const { return HexagonArchVersion >= V4; } + bool useMemOps () const { return HexagonArchVersion >= V4 && UseMemOps; } + + bool isSubtargetV2() const { return HexagonArchVersion == V2;} + const std::string &getCPUString () const { return CPUString; } + + // Threshold for small data section + unsigned getSmallDataThreshold() const { + return Hexagon_SMALL_DATA_THRESHOLD; + } + const HexagonArchEnum &getHexagonArchVersion() const { + return HexagonArchVersion; + } +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp new file mode 100644 index 0000000..b29e92c --- /dev/null +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -0,0 +1,118 @@ +//===-- HexagonTargetMachine.cpp - Define TargetMachine for Hexagon -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "HexagonTargetMachine.h" +#include "Hexagon.h" +#include "HexagonISelLowering.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/PassManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/TargetRegistry.h" +#include <iostream> + +using namespace llvm; + +static cl:: +opt<bool> DisableHardwareLoops( + "disable-hexagon-hwloops", cl::Hidden, + cl::desc("Disable Hardware Loops for Hexagon target")); + +/// HexagonTargetMachineModule - Note that this is used on hosts that +/// cannot link in a library unless there are references into the +/// library. In particular, it seems that it is not possible to get +/// things to work on Win32 without this. Though it is unused, do not +/// remove it. +extern "C" int HexagonTargetMachineModule; +int HexagonTargetMachineModule = 0; + +extern "C" void LLVMInitializeHexagonTarget() { + // Register the target. + RegisterTargetMachine<HexagonTargetMachine> X(TheHexagonTarget); +} + + +/// HexagonTargetMachine ctor - Create an ILP32 architecture model. +/// + +/// Hexagon_TODO: Do I need an aggregate alignment? +/// +HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + TargetOptions Options, + Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + DataLayout("e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-a0:0") , + Subtarget(TT, CPU, FS), TLInfo(*this), InstrInfo(Subtarget), + TSInfo(*this), + FrameLowering(Subtarget), + InstrItins(&Subtarget.getInstrItineraryData()) { + setMCUseCFI(false); +} + +// addPassesForOptimizations - Allow the backend (target) to add Target +// Independent Optimization passes to the Pass Manager. +bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) { + + PM.add(createConstantPropagationPass()); + PM.add(createLoopSimplifyPass()); + PM.add(createDeadCodeEliminationPass()); + PM.add(createConstantPropagationPass()); + PM.add(createLoopUnrollPass()); + PM.add(createLoopStrengthReducePass(getTargetLowering())); + return true; +} + +bool HexagonTargetMachine::addInstSelector(PassManagerBase &PM) { + PM.add(createHexagonRemoveExtendOps(*this)); + PM.add(createHexagonISelDag(*this)); + return false; +} + + +bool HexagonTargetMachine::addPreRegAlloc(PassManagerBase &PM) { + if (!DisableHardwareLoops) { + PM.add(createHexagonHardwareLoops()); + } + + return false; +} + +bool HexagonTargetMachine::addPostRegAlloc(PassManagerBase &PM) { + PM.add(createHexagonCFGOptimizer(*this)); + return true; +} + + +bool HexagonTargetMachine::addPreSched2(PassManagerBase &PM) { + PM.add(createIfConverterPass()); + return true; +} + +bool HexagonTargetMachine::addPreEmitPass(PassManagerBase &PM) { + + if (!DisableHardwareLoops) { + PM.add(createHexagonFixupHwLoops()); + } + + // Expand Spill code for predicate registers. + PM.add(createHexagonExpandPredSpillCode(*this)); + + // Split up TFRcondsets into conditional transfers. + PM.add(createHexagonSplitTFRCondSets(*this)); + + return false; +} diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h new file mode 100644 index 0000000..e27d3ae --- /dev/null +++ b/lib/Target/Hexagon/HexagonTargetMachine.h @@ -0,0 +1,86 @@ +//=-- HexagonTargetMachine.h - Define TargetMachine for Hexagon ---*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Hexagon specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonTARGETMACHINE_H +#define HexagonTARGETMACHINE_H + +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "HexagonInstrInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonISelLowering.h" +#include "HexagonSelectionDAGInfo.h" +#include "HexagonFrameLowering.h" + +namespace llvm { + +class Module; + +class HexagonTargetMachine : public LLVMTargetMachine { + const TargetData DataLayout; // Calculates type size & alignment. + HexagonSubtarget Subtarget; + HexagonTargetLowering TLInfo; + HexagonInstrInfo InstrInfo; + HexagonSelectionDAGInfo TSInfo; + HexagonFrameLowering FrameLowering; + const InstrItineraryData* InstrItins; + +public: + HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU, + StringRef FS, TargetOptions Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OL); + + virtual const HexagonInstrInfo *getInstrInfo() const { + return &InstrInfo; + } + virtual const HexagonSubtarget *getSubtargetImpl() const { + return &Subtarget; + } + virtual const HexagonRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + + virtual const InstrItineraryData* getInstrItineraryData() const { + return InstrItins; + } + + + virtual const HexagonTargetLowering* getTargetLowering() const { + return &TLInfo; + } + + virtual const HexagonFrameLowering* getFrameLowering() const { + return &FrameLowering; + } + + virtual const HexagonSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + + virtual const TargetData *getTargetData() const { return &DataLayout; } + static unsigned getModuleMatchQuality(const Module &M); + + // Pass Pipeline Configuration. + virtual bool addPassesForOptimizations(PassManagerBase &PM); + virtual bool addInstSelector(PassManagerBase &PM); + virtual bool addPreEmitPass(PassManagerBase &PM); + virtual bool addPreRegAlloc(llvm::PassManagerBase &PM); + virtual bool addPostRegAlloc(PassManagerBase &PM); + virtual bool addPreSched2(PassManagerBase &PM); +}; + +extern bool flag_aligned_memcpy; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp new file mode 100644 index 0000000..188337d --- /dev/null +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -0,0 +1,94 @@ +//===-- HexagonTargetObjectFile.cpp - Hexagon asm properties ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the HexagonTargetAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Target/TargetData.h" +#include "llvm/DerivedTypes.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/CommandLine.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetObjectFile.h" +#include "HexagonTargetMachine.h" + +using namespace llvm; + +static cl::opt<int> SmallDataThreshold("hexagon-small-data-threshold", + cl::init(8), cl::Hidden); + +void HexagonTargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + + + SmallDataSection = + getContext().getELFSection(".sdata", ELF::SHT_PROGBITS, + ELF::SHF_WRITE | ELF::SHF_ALLOC, + SectionKind::getDataRel()); + SmallBSSSection = + getContext().getELFSection(".sbss", ELF::SHT_NOBITS, + ELF::SHF_WRITE | ELF::SHF_ALLOC, + SectionKind::getBSS()); +} + +// sdata/sbss support taken largely from the MIPS Backend. +static bool IsInSmallSection(uint64_t Size) { + return Size > 0 && Size <= (uint64_t)SmallDataThreshold; +} +/// IsGlobalInSmallSection - Return true if this global value should be +/// placed into small data/bss section. +bool HexagonTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV, + const TargetMachine &TM) const { + // If the primary definition of this global value is outside the current + // translation unit or the global value is available for inspection but not + // emission, then do nothing. + if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) + return false; + + // Otherwise, Check if GV should be in sdata/sbss, when normally it would end + // up in getKindForGlobal(GV, TM). + return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM)); +} + +/// IsGlobalInSmallSection - Return true if this global value should be +/// placed into small data/bss section. +bool HexagonTargetObjectFile:: +IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, + SectionKind Kind) const { + // Only global variables, not functions. + const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV); + if (!GVA) + return false; + + if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) { + Type *Ty = GV->getType()->getElementType(); + return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty)); + } + + return false; +} + +const MCSection *HexagonTargetObjectFile:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + + // Handle Small Section classification here. + if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind)) + return SmallBSSSection; + if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind)) + return SmallDataSection; + + // Otherwise, we work the same as ELF. + return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM); +} diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.h b/lib/Target/Hexagon/HexagonTargetObjectFile.h new file mode 100644 index 0000000..101c1f2 --- /dev/null +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.h @@ -0,0 +1,40 @@ +//===-- HexagonTargetAsmInfo.h - Hexagon asm properties ---------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonTARGETOBJECTFILE_H +#define HexagonTARGETOBJECTFILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/MC/MCSectionELF.h" + +namespace llvm { + + class HexagonTargetObjectFile : public TargetLoweringObjectFileELF { + const MCSectionELF *SmallDataSection; + const MCSectionELF *SmallBSSSection; + public: + virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + + /// IsGlobalInSmallSection - Return true if this global address should be + /// placed into small data/bss section. + bool IsGlobalInSmallSection(const GlobalValue *GV, + const TargetMachine &TM, + SectionKind Kind) const; + bool IsGlobalInSmallSection(const GlobalValue *GV, + const TargetMachine &TM) const; + + const MCSection* SelectSectionForGlobal(const GlobalValue *GV, + SectionKind Kind, + Mangler *Mang, + const TargetMachine &TM) const; + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/Hexagon/HexagonVarargsCallingConvention.h b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h new file mode 100644 index 0000000..21b2d67 --- /dev/null +++ b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h @@ -0,0 +1,141 @@ +//==-- HexagonVarargsCallingConvention.h - Calling Conventions ---*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the functions that assign locations to outgoing function +// arguments. Adapted from the target independent version but this handles +// calls to varargs functions +// +//===----------------------------------------------------------------------===// +// + + + + +static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, + Hexagon_CCState &State, + int NonVarArgsParams, + int CurrentParam, + bool ForceMem); + + +static bool CC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, + Hexagon_CCState &State, + int NonVarArgsParams, + int CurrentParam, + bool ForceMem) { + unsigned ByValSize = 0; + if (ArgFlags.isByVal() && + ((ByValSize = ArgFlags.getByValSize()) > + (MVT(MVT::i64).getSizeInBits() / 8))) { + ForceMem = true; + } + + + // Only assign registers for named (non varargs) arguments + if ( !ForceMem && ((NonVarArgsParams == -1) || (CurrentParam <= + NonVarArgsParams))) { + + if (LocVT == MVT::i32 || + LocVT == MVT::i16 || + LocVT == MVT::i8 || + LocVT == MVT::f32) { + static const unsigned RegList1[] = { + Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, + Hexagon::R5 + }; + if (unsigned Reg = State.AllocateReg(RegList1, 6)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg, + LocVT.getSimpleVT(), LocInfo)); + return false; + } + } + + if (LocVT == MVT::i64 || + LocVT == MVT::f64) { + static const unsigned RegList2[] = { + Hexagon::D0, Hexagon::D1, Hexagon::D2 + }; + if (unsigned Reg = State.AllocateReg(RegList2, 3)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg, + LocVT.getSimpleVT(), LocInfo)); + return false; + } + } + } + + const Type* ArgTy = LocVT.getTypeForEVT(State.getContext()); + unsigned Alignment = + State.getTarget().getTargetData()->getABITypeAlignment(ArgTy); + unsigned Size = + State.getTarget().getTargetData()->getTypeSizeInBits(ArgTy) / 8; + + // If it's passed by value, then we need the size of the aggregate not of + // the pointer. + if (ArgFlags.isByVal()) { + Size = ByValSize; + + // Hexagon_TODO: Get the alignment of the contained type here. + Alignment = 8; + } + + unsigned Offset3 = State.AllocateStack(Size, Alignment); + State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3, + LocVT.getSimpleVT(), LocInfo)); + return false; +} + + +static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, + Hexagon_CCState &State, + int NonVarArgsParams, + int CurrentParam, + bool ForceMem) { + + if (LocVT == MVT::i32 || + LocVT == MVT::f32) { + static const unsigned RegList1[] = { + Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, + Hexagon::R5 + }; + if (unsigned Reg = State.AllocateReg(RegList1, 6)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg, + LocVT.getSimpleVT(), LocInfo)); + return false; + } + } + + if (LocVT == MVT::i64 || + LocVT == MVT::f64) { + static const unsigned RegList2[] = { + Hexagon::D0, Hexagon::D1, Hexagon::D2 + }; + if (unsigned Reg = State.AllocateReg(RegList2, 3)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg, + LocVT.getSimpleVT(), LocInfo)); + return false; + } + } + + const Type* ArgTy = LocVT.getTypeForEVT(State.getContext()); + unsigned Alignment = + State.getTarget().getTargetData()->getABITypeAlignment(ArgTy); + unsigned Size = + State.getTarget().getTargetData()->getTypeSizeInBits(ArgTy) / 8; + + unsigned Offset3 = State.AllocateStack(Size, Alignment); + State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3, + LocVT.getSimpleVT(), LocInfo)); + return false; +} diff --git a/lib/Target/Hexagon/LLVMBuild.txt b/lib/Target/Hexagon/LLVMBuild.txt new file mode 100644 index 0000000..84ea6a0 --- /dev/null +++ b/lib/Target/Hexagon/LLVMBuild.txt @@ -0,0 +1,32 @@ +;===- ./lib/Target/Hexagon/LLVMBuild.txt -----------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = TargetInfo MCTargetDesc + +[component_0] +type = TargetGroup +name = Hexagon +parent = Target +has_asmprinter = 1 + +[component_1] +type = Library +name = HexagonCodeGen +parent = Hexagon +required_libraries = AsmPrinter CodeGen Core HexagonInfo SelectionDAG Support Target MC HexagonDesc +add_to_library_groups = Hexagon diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt new file mode 100644 index 0000000..8e3da99 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,6 @@ +add_llvm_library(LLVMHexagonDesc + HexagonMCTargetDesc.cpp + HexagonMCAsmInfo.cpp + ) + +add_dependencies(LLVMHexagonDesc HexagonCommonTableGen) diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp new file mode 100644 index 0000000..188693c --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp @@ -0,0 +1,36 @@ +//===-- HexagonMCAsmInfo.cpp - Hexagon asm properties -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the HexagonMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "HexagonMCAsmInfo.h" + +using namespace llvm; + +HexagonMCAsmInfo::HexagonMCAsmInfo(const Target &T, StringRef TT) { + Data16bitsDirective = "\t.half\t"; + Data32bitsDirective = "\t.word\t"; + Data64bitsDirective = 0; // .xword is only supported by V9. + ZeroDirective = "\t.skip\t"; + CommentString = "//"; + HasLEB128 = true; + + PrivateGlobalPrefix = ".L"; + LCOMMDirectiveType = LCOMM::ByteAlignment; + InlineAsmStart = "# InlineAsm Start"; + InlineAsmEnd = "# InlineAsm End"; + ZeroDirective = "\t.space\t"; + AscizDirective = "\t.string\t"; + WeakRefDirective = "\t.weak\t"; + + UsesELFSectionDirectiveForBSS = true; + ExceptionsType = ExceptionHandling::DwarfCFI; +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h new file mode 100644 index 0000000..8196e95 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h @@ -0,0 +1,30 @@ +//===-- HexagonTargetAsmInfo.h - Hexagon asm properties ---------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the HexagonMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonMCASMINFO_H +#define HexagonMCASMINFO_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + class Target; + + class HexagonMCAsmInfo : public MCAsmInfo { + public: + explicit HexagonMCAsmInfo(const Target &T, StringRef TT); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp new file mode 100644 index 0000000..625f07c --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -0,0 +1,94 @@ +//===-- HexagonMCTargetDesc.cpp - Cell Hexagon Target Descriptions -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Cell Hexagon specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "HexagonMCTargetDesc.h" +#include "HexagonMCAsmInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "HexagonGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "HexagonGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "HexagonGenRegisterInfo.inc" + +using namespace llvm; + +static MCInstrInfo *createHexagonMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitHexagonMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitHexagonMCRegisterInfo(X, Hexagon::R0); + return X; +} + +static MCSubtargetInfo *createHexagonMCSubtargetInfo(StringRef TT, + StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitHexagonMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +static MCAsmInfo *createHexagonMCAsmInfo(const Target &T, StringRef TT) { + MCAsmInfo *MAI = new HexagonMCAsmInfo(T, TT); + + // VirtualFP = (R30 + #0). + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(Hexagon::R30, 0); + MAI->addInitialFrameState(0, Dst, Src); + + return MAI; +} + +static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + // For the time being, use static relocations, since there's really no + // support for PIC yet. + X->InitMCCodeGenInfo(Reloc::Static, CM, OL); + return X; +} + +// Force static initialization. +extern "C" void LLVMInitializeHexagonTargetMC() { + // Register the MC asm info. + RegisterMCAsmInfoFn X(TheHexagonTarget, createHexagonMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheHexagonTarget, + createHexagonMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget, createHexagonMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheHexagonTarget, + createHexagonMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheHexagonTarget, + createHexagonMCSubtargetInfo); +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h new file mode 100644 index 0000000..364841f --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -0,0 +1,40 @@ +//===-- SPUMCTargetDesc.h - Hexagon Target Descriptions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Hexagon specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef SPUMCTARGETDESC_H +#define SPUMCTARGETDESC_H + +namespace llvm { +class MCSubtargetInfo; +class Target; +class StringRef; + +extern Target TheHexagonTarget; + +} // End llvm namespace + +// Define symbolic names for Hexagon registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "HexagonGenRegisterInfo.inc" + +// Defines symbolic names for the Hexagon instructions. +// +#define GET_INSTRINFO_ENUM +#include "HexagonGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "HexagonGenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000..1114d99 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = HexagonDesc +parent = Hexagon +required_libraries = HexagonInfo MC +add_to_library_groups = Hexagon diff --git a/lib/Target/Hexagon/MCTargetDesc/Makefile b/lib/Target/Hexagon/MCTargetDesc/Makefile new file mode 100644 index 0000000..67be2bc --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMHexagonDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Hexagon/Makefile b/lib/Target/Hexagon/Makefile new file mode 100644 index 0000000..c936e92 --- /dev/null +++ b/lib/Target/Hexagon/Makefile @@ -0,0 +1,23 @@ +##===- lib/Target/Hexagon/Makefile -------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMHexagonCodeGen +TARGET = Hexagon + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = HexagonGenRegisterInfo.inc \ + HexagonGenInstrInfo.inc \ + HexagonGenAsmWriter.inc \ + HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \ + HexagonGenCallingConv.inc \ + HexagonAsmPrinter.cpp + +DIRS = TargetInfo MCTargetDesc + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Hexagon/TargetInfo/CMakeLists.txt b/lib/Target/Hexagon/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000..5b04a30 --- /dev/null +++ b/lib/Target/Hexagon/TargetInfo/CMakeLists.txt @@ -0,0 +1,8 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMHexagonInfo + HexagonTargetInfo.cpp + ) + +add_dependencies(LLVMHexagonInfo HexagonCommonTableGen) diff --git a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp new file mode 100644 index 0000000..7aa5dd3 --- /dev/null +++ b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp @@ -0,0 +1,19 @@ +//===-- HexagonTargetInfo.cpp - Hexagon Target Implementation ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "llvm/Module.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheHexagonTarget; + +extern "C" void LLVMInitializeHexagonTargetInfo() { + RegisterTarget<Triple::hexagon, /*HasJIT=*/false> X(TheHexagonTarget, "hexagon", "Hexagon"); +} diff --git a/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt b/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000..7b87be3 --- /dev/null +++ b/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/Hexagon/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = HexagonInfo +parent = Hexagon +required_libraries = MC Support +add_to_library_groups = Hexagon diff --git a/lib/Target/Hexagon/TargetInfo/Makefile b/lib/Target/Hexagon/TargetInfo/Makefile new file mode 100644 index 0000000..494cca1 --- /dev/null +++ b/lib/Target/Hexagon/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/Hexagon/TargetInfo/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMHexagonInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index 358cbc8..5a42ca5 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = ARM CBackend CellSPU CppBackend Hexagon MBlaze MSP430 Mips PTX PowerPC Sparc X86 XCore + ; This is a special group whose required libraries are extended (by llvm-build) ; with the best execution engine (the native JIT, if available, or the ; interpreter). diff --git a/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/lib/Target/MBlaze/AsmParser/CMakeLists.txt index ec8f52a..813767b 100644 --- a/lib/Target/MBlaze/AsmParser/CMakeLists.txt +++ b/lib/Target/MBlaze/AsmParser/CMakeLists.txt @@ -6,11 +6,4 @@ add_llvm_library(LLVMMBlazeAsmParser MBlazeAsmParser.cpp ) -add_llvm_library_dependencies(LLVMMBlazeAsmParser - LLVMMBlazeInfo - LLVMMC - LLVMMCParser - LLVMSupport - ) - add_dependencies(LLVMMBlazeAsmParser MBlazeCommonTableGen) diff --git a/lib/Target/MBlaze/AsmParser/LLVMBuild.txt b/lib/Target/MBlaze/AsmParser/LLVMBuild.txt index 2c61a7f..b10189a 100644 --- a/lib/Target/MBlaze/AsmParser/LLVMBuild.txt +++ b/lib/Target/MBlaze/AsmParser/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MBlazeAsmParser parent = MBlaze required_libraries = MBlazeInfo MC MCParser Support add_to_library_groups = MBlaze - diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt index d3f1383..71095e5 100644 --- a/lib/Target/MBlaze/CMakeLists.txt +++ b/lib/Target/MBlaze/CMakeLists.txt @@ -29,19 +29,6 @@ add_llvm_target(MBlazeCodeGen MBlazeELFWriterInfo.cpp ) -add_llvm_library_dependencies(LLVMMBlazeCodeGen - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMBlazeAsmPrinter - LLVMMBlazeDesc - LLVMMBlazeInfo - LLVMMC - LLVMSelectionDAG - LLVMSupport - LLVMTarget - ) - add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) diff --git a/lib/Target/MBlaze/Disassembler/CMakeLists.txt b/lib/Target/MBlaze/Disassembler/CMakeLists.txt index e0a53ee..be2dce1 100644 --- a/lib/Target/MBlaze/Disassembler/CMakeLists.txt +++ b/lib/Target/MBlaze/Disassembler/CMakeLists.txt @@ -13,11 +13,4 @@ set_property( ) endif() -add_llvm_library_dependencies(LLVMMBlazeDisassembler - LLVMMBlazeDesc - LLVMMBlazeInfo - LLVMMC - LLVMSupport - ) - add_dependencies(LLVMMBlazeDisassembler MBlazeCommonTableGen) diff --git a/lib/Target/MBlaze/Disassembler/LLVMBuild.txt b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt index c5c4f80..28dd9dc 100644 --- a/lib/Target/MBlaze/Disassembler/LLVMBuild.txt +++ b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MBlazeDisassembler parent = MBlaze required_libraries = MBlazeDesc MBlazeInfo MC Support add_to_library_groups = MBlaze - diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp index 3087317..ccc3a05 100644 --- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp +++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp @@ -123,6 +123,7 @@ static unsigned decodeSEXT(uint32_t insn) { case 0x41: return MBlaze::SRL; case 0x21: return MBlaze::SRC; case 0x01: return MBlaze::SRA; + case 0xE0: return MBlaze::CLZ; } } @@ -176,6 +177,13 @@ static unsigned decodeBR(uint32_t insn) { } static unsigned decodeBRI(uint32_t insn) { + switch (insn&0x3FFFFFF) { + default: break; + case 0x0020004: return MBlaze::IDMEMBAR; + case 0x0220004: return MBlaze::DMEMBAR; + case 0x0420004: return MBlaze::IMEMBAR; + } + switch ((insn>>16)&0x1F) { default: return UNSUPPORTED; case 0x00: return MBlaze::BRI; @@ -531,6 +539,9 @@ MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr, default: return Fail; + case MBlazeII::FC: + break; + case MBlazeII::FRRRR: if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED) return Fail; @@ -547,6 +558,13 @@ MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr, instr.addOperand(MCOperand::CreateReg(RB)); break; + case MBlazeII::FRR: + if (RD == UNSUPPORTED || RA == UNSUPPORTED) + return Fail; + instr.addOperand(MCOperand::CreateReg(RD)); + instr.addOperand(MCOperand::CreateReg(RA)); + break; + case MBlazeII::FRI: switch (opcode) { default: diff --git a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt index aff0b3d..586e2d3 100644 --- a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt +++ b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt @@ -5,9 +5,4 @@ add_llvm_library(LLVMMBlazeAsmPrinter MBlazeInstPrinter.cpp ) -add_llvm_library_dependencies(LLVMMBlazeAsmPrinter - LLVMMC - LLVMSupport - ) - add_dependencies(LLVMMBlazeAsmPrinter MBlazeCommonTableGen) diff --git a/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt b/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt index 7a21f1e..3a21a05 100644 --- a/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt +++ b/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MBlazeAsmPrinter parent = MBlaze required_libraries = MC Support add_to_library_groups = MBlaze - diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h index 570ab08..5297563 100644 --- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h +++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h @@ -1,4 +1,4 @@ -//===-- MBLazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax ----===// +//===-- MBlazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax ----===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/LLVMBuild.txt b/lib/Target/MBlaze/LLVMBuild.txt index f1a3f5d..0b29007 100644 --- a/lib/Target/MBlaze/LLVMBuild.txt +++ b/lib/Target/MBlaze/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo + [component_0] type = TargetGroup name = MBlaze @@ -29,4 +32,3 @@ name = MBlazeCodeGen parent = MBlaze required_libraries = AsmPrinter CodeGen Core MBlazeAsmPrinter MBlazeDesc MBlazeInfo MC SelectionDAG Support Target add_to_library_groups = MBlaze - diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp index ff051e3..c751dd8 100644 --- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp @@ -310,9 +310,9 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { // Check if the last terminator is an unconditional branch. MachineBasicBlock::const_iterator I = Pred->end(); - while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) + while (I != Pred->begin() && !(--I)->isTerminator()) ; // Noop - return I == Pred->end() || !I->getDesc().isBarrier(); + return I == Pred->end() || !I->isBarrier(); } // Force static initialization. diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp index c07570a..19e787d 100644 --- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp +++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp @@ -29,13 +29,11 @@ using namespace llvm; STATISTIC(FilledSlots, "Number of delay slots filled"); -namespace llvm { -cl::opt<bool> DisableDelaySlotFiller( +static cl::opt<bool> MBDisableDelaySlotFiller( "disable-mblaze-delay-filler", cl::init(false), cl::desc("Disable the MBlaze delay slot filter."), cl::Hidden); -} namespace { struct Filler : public MachineFunctionPass { @@ -109,7 +107,6 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate, // Hazard check MachineBasicBlock::iterator a = candidate; MachineBasicBlock::iterator b = slot; - MCInstrDesc desc = candidate->getDesc(); // MBB layout:- // candidate := a0 = operation(a1, a2) @@ -123,7 +120,7 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate, // 4. b0 is one or more of {a1, a2} // 5. a accesses memory, and the middle bit // contains a store operation. - bool a_is_memory = desc.mayLoad() || desc.mayStore(); + bool a_is_memory = candidate->mayLoad() || candidate->mayStore(); // Determine the number of operands in the slot instruction and in the // candidate instruction. @@ -156,7 +153,7 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate, } // Check hazard type 5 - if (a_is_memory && m->getDesc().mayStore()) + if (a_is_memory && m->mayStore()) return true; } @@ -183,8 +180,8 @@ static bool isDelayFiller(MachineBasicBlock &MBB, if (candidate == MBB.begin()) return false; - MCInstrDesc brdesc = (--candidate)->getDesc(); - return (brdesc.hasDelaySlot()); + --candidate; + return (candidate->hasDelaySlot()); } static bool hasUnknownSideEffects(MachineBasicBlock::iterator &I) { @@ -211,9 +208,8 @@ findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) { break; --I; - MCInstrDesc desc = I->getDesc(); - if (desc.hasDelaySlot() || desc.isBranch() || isDelayFiller(MBB,I) || - desc.isCall() || desc.isReturn() || desc.isBarrier() || + if (I->hasDelaySlot() || I->isBranch() || isDelayFiller(MBB,I) || + I->isCall() || I->isReturn() || I->isBarrier() || hasUnknownSideEffects(I)) break; @@ -232,11 +228,11 @@ findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) { bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) - if (I->getDesc().hasDelaySlot()) { + if (I->hasDelaySlot()) { MachineBasicBlock::iterator D = MBB.end(); MachineBasicBlock::iterator J = I; - if (!DisableDelaySlotFiller) + if (!MBDisableDelaySlotFiller) D = findDelayInstr(MBB,I); ++FilledSlots; diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp index f28d5a7..37919bc 100644 --- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp +++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp @@ -32,13 +32,11 @@ using namespace llvm; -namespace llvm { - cl::opt<bool> DisableStackAdjust( - "disable-mblaze-stack-adjust", - cl::init(false), - cl::desc("Disable MBlaze stack layout adjustment."), - cl::Hidden); -} +static cl::opt<bool> MBDisableStackAdjust( + "disable-mblaze-stack-adjust", + cl::init(false), + cl::desc("Disable MBlaze stack layout adjustment."), + cl::Hidden); static void replaceFrameIndexes(MachineFunction &MF, SmallVector<std::pair<int,int64_t>, 16> &FR) { @@ -85,7 +83,7 @@ static void replaceFrameIndexes(MachineFunction &MF, //===----------------------------------------------------------------------===// static void analyzeFrameIndexes(MachineFunction &MF) { - if (DisableStackAdjust) return; + if (MBDisableStackAdjust) return; MachineFrameInfo *MFI = MF.getFrameInfo(); MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); @@ -336,7 +334,8 @@ int MBlazeFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) // if frame pointer elimination is disabled. bool MBlazeFrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects(); + return MF.getTarget().Options.DisableFramePointerElim(MF) || + MFI->hasVarSizedObjects(); } void MBlazeFrameLowering::emitPrologue(MachineFunction &MF) const { diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 148d906..0002174 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -167,7 +167,9 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM) setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); setOperationAction(ISD::CTLZ, MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::CTTZ, MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::BSWAP, MVT::i32, Expand); diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td index 54f605f..4c6034d 100644 --- a/lib/Target/MBlaze/MBlazeInstrFormats.td +++ b/lib/Target/MBlaze/MBlazeInstrFormats.td @@ -35,6 +35,7 @@ def FRIR : Format<17>; // RSUBI def FRRRR : Format<18>; // RSUB, FRSUB def FRI : Format<19>; // RSUB, FRSUB def FC : Format<20>; // NOP +def FRR : Format<21>; // CLZ //===----------------------------------------------------------------------===// // Describe MBlaze instructions format @@ -202,3 +203,26 @@ class MSR<bits<6> op, bits<6> flags, dag outs, dag ins, string asmstr, let Inst{11-16} = flags; let Inst{17-31} = imm15; } + +//===----------------------------------------------------------------------===// +// TCLZ instruction class in MBlaze : <|opcode|rd|imm15|> +//===----------------------------------------------------------------------===// +class TCLZ<bits<6> op, bits<16> flags, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin> : + MBlazeInst<op, FRR, outs, ins, asmstr, pattern, itin> { + bits<5> rd; + bits<5> ra; + + let Inst{6-10} = rd; + let Inst{11-15} = ra; + let Inst{16-31} = flags; +} + +//===----------------------------------------------------------------------===// +// MBAR instruction class in MBlaze : <|opcode|rd|imm15|> +//===----------------------------------------------------------------------===// +class MBAR<bits<6> op, bits<26> flags, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin> : + MBlazeInst<op, FC, outs, ins, asmstr, pattern, itin> { + let Inst{6-31} = flags; +} diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td index 1d8c987..9fe2a49 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.td +++ b/lib/Target/MBlaze/MBlazeInstrInfo.td @@ -594,9 +594,18 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, //===----------------------------------------------------------------------===// let neverHasSideEffects = 1 in { - def NOP : MBlazeInst< 0x20, FC, (outs), (ins), "nop ", [], IIC_ALU>; + def NOP : MBlazeInst<0x20, FC, (outs), (ins), "nop ", [], IIC_ALU>; } +let Predicates=[HasPatCmp] in { + def CLZ : TCLZ<0x24, 0x00E0, (outs GPR:$dst), (ins GPR:$src), + "clz $dst, $src", [], IIC_ALU>; +} + +def IMEMBAR : MBAR<0x2E, 0x0420004, (outs), (ins), "mbar 2", [], IIC_ALU>; +def DMEMBAR : MBAR<0x2E, 0x0220004, (outs), (ins), "mbar 1", [], IIC_ALU>; +def IDMEMBAR : MBAR<0x2E, 0x0020004, (outs), (ins), "mbar 0", [], IIC_ALU>; + let usesCustomInserter = 1 in { def Select_CC : MBlazePseudo<(outs GPR:$dst), (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC), // F T reversed @@ -751,6 +760,56 @@ def : Pat<(sra GPR:$L, GPR:$R), (ShiftRA GPR:$L, GPR:$R)>; def : Pat<(srl GPR:$L, GPR:$R), (ShiftRL GPR:$L, GPR:$R)>; // SET_CC operations +def : Pat<(setcc (i32 GPR:$L), (i32 0), SETEQ), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 1)>; +def : Pat<(setcc (i32 GPR:$L), (i32 0), SETNE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 2)>; +def : Pat<(setcc (i32 GPR:$L), (i32 0), SETGT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 3)>; +def : Pat<(setcc (i32 GPR:$L), (i32 0), SETLT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 4)>; +def : Pat<(setcc (i32 GPR:$L), (i32 0), SETGE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 5)>; +def : Pat<(setcc (i32 GPR:$L), (i32 0), SETLE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 6)>; +def : Pat<(setcc (i32 GPR:$L), (i32 0), SETUGT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMPU (i32 R0), GPR:$L), 3)>; +def : Pat<(setcc (i32 GPR:$L), (i32 0), SETULT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMPU (i32 R0), GPR:$L), 4)>; +def : Pat<(setcc (i32 GPR:$L), (i32 0), SETUGE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMPU (i32 R0), GPR:$L), 5)>; +def : Pat<(setcc (i32 GPR:$L), (i32 0), SETULE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMPU (i32 R0), GPR:$L), 6)>; + +def : Pat<(setcc (i32 0), (i32 GPR:$R), SETEQ), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 1)>; +def : Pat<(setcc (i32 0), (i32 GPR:$R), SETNE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 2)>; +def : Pat<(setcc (i32 0), (i32 GPR:$R), SETGT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 3)>; +def : Pat<(setcc (i32 0), (i32 GPR:$R), SETLT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 4)>; +def : Pat<(setcc (i32 0), (i32 GPR:$R), SETGE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 5)>; +def : Pat<(setcc (i32 0), (i32 GPR:$R), SETLE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 6)>; +def : Pat<(setcc (i32 0), (i32 GPR:$R), SETUGT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMPU GPR:$R, (i32 R0)), 3)>; +def : Pat<(setcc (i32 0), (i32 GPR:$R), SETULT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMPU GPR:$R, (i32 R0)), 4)>; +def : Pat<(setcc (i32 0), (i32 GPR:$R), SETUGE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMPU GPR:$R, (i32 R0)), 5)>; +def : Pat<(setcc (i32 0), (i32 GPR:$R), SETULE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMPU GPR:$R, (i32 R0)), 6)>; + def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ), (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), (CMP GPR:$R, GPR:$L), 1)>; @@ -787,6 +846,68 @@ def : Pat<(select (i32 GPR:$C), (i32 GPR:$T), (i32 GPR:$F)), (Select_CC GPR:$T, GPR:$F, GPR:$C, 2)>; // SELECT_CC +def : Pat<(selectcc (i32 GPR:$L), (i32 0), + (i32 GPR:$T), (i32 GPR:$F), SETEQ), + (Select_CC GPR:$T, GPR:$F, GPR:$L, 1)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 0), + (i32 GPR:$T), (i32 GPR:$F), SETNE), + (Select_CC GPR:$T, GPR:$F, GPR:$L, 2)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 0), + (i32 GPR:$T), (i32 GPR:$F), SETGT), + (Select_CC GPR:$T, GPR:$F, GPR:$L, 3)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 0), + (i32 GPR:$T), (i32 GPR:$F), SETLT), + (Select_CC GPR:$T, GPR:$F, GPR:$L, 4)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 0), + (i32 GPR:$T), (i32 GPR:$F), SETGE), + (Select_CC GPR:$T, GPR:$F, GPR:$L, 5)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 0), + (i32 GPR:$T), (i32 GPR:$F), SETLE), + (Select_CC GPR:$T, GPR:$F, GPR:$L, 6)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 0), + (i32 GPR:$T), (i32 GPR:$F), SETUGT), + (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 3)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 0), + (i32 GPR:$T), (i32 GPR:$F), SETULT), + (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 4)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 0), + (i32 GPR:$T), (i32 GPR:$F), SETUGE), + (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 5)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 0), + (i32 GPR:$T), (i32 GPR:$F), SETULE), + (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 6)>; + +def : Pat<(selectcc (i32 0), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETEQ), + (Select_CC GPR:$T, GPR:$F, GPR:$R, 1)>; +def : Pat<(selectcc (i32 0), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETNE), + (Select_CC GPR:$T, GPR:$F, GPR:$R, 2)>; +def : Pat<(selectcc (i32 0), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETGT), + (Select_CC GPR:$T, GPR:$F, GPR:$R, 3)>; +def : Pat<(selectcc (i32 0), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETLT), + (Select_CC GPR:$T, GPR:$F, GPR:$R, 4)>; +def : Pat<(selectcc (i32 0), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETGE), + (Select_CC GPR:$T, GPR:$F, GPR:$R, 5)>; +def : Pat<(selectcc (i32 0), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETLE), + (Select_CC GPR:$T, GPR:$F, GPR:$R, 6)>; +def : Pat<(selectcc (i32 0), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETUGT), + (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 3)>; +def : Pat<(selectcc (i32 0), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETULT), + (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 4)>; +def : Pat<(selectcc (i32 0), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETUGE), + (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 5)>; +def : Pat<(selectcc (i32 0), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETULE), + (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 6)>; + def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R), (i32 GPR:$T), (i32 GPR:$F), SETEQ), (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 1)>; @@ -827,6 +948,48 @@ def : Pat<(br bb:$T), (BRID bb:$T)>; def : Pat<(brind GPR:$T), (BRAD GPR:$T)>; // BRCOND instructions +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETEQ), bb:$T), + (BEQID GPR:$L, bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETNE), bb:$T), + (BNEID GPR:$L, bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETGT), bb:$T), + (BGTID GPR:$L, bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETLT), bb:$T), + (BLTID GPR:$L, bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETGE), bb:$T), + (BGEID GPR:$L, bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETLE), bb:$T), + (BLEID GPR:$L, bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETUGT), bb:$T), + (BGTID (CMPU (i32 R0), GPR:$L), bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETULT), bb:$T), + (BLTID (CMPU (i32 R0), GPR:$L), bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETUGE), bb:$T), + (BGEID (CMPU (i32 R0), GPR:$L), bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETULE), bb:$T), + (BLEID (CMPU (i32 R0), GPR:$L), bb:$T)>; + +def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETEQ), bb:$T), + (BEQID GPR:$R, bb:$T)>; +def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETNE), bb:$T), + (BNEID GPR:$R, bb:$T)>; +def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETGT), bb:$T), + (BGTID GPR:$R, bb:$T)>; +def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETLT), bb:$T), + (BLTID GPR:$R, bb:$T)>; +def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETGE), bb:$T), + (BGEID GPR:$R, bb:$T)>; +def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETLE), bb:$T), + (BLEID GPR:$R, bb:$T)>; +def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETUGT), bb:$T), + (BGTID (CMPU GPR:$R, (i32 R0)), bb:$T)>; +def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETULT), bb:$T), + (BLTID (CMPU GPR:$R, (i32 R0)), bb:$T)>; +def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETUGE), bb:$T), + (BGEID (CMPU GPR:$R, (i32 R0)), bb:$T)>; +def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETULE), bb:$T), + (BLEID (CMPU GPR:$R, (i32 R0)), bb:$T)>; + def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ), bb:$T), (BEQID (CMP GPR:$R, GPR:$L), bb:$T)>; def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETNE), bb:$T), @@ -869,11 +1032,11 @@ def : Pat<(store (i32 GPR:$dst), xaddr:$addr), (SW GPR:$dst, xaddr:$addr)>; def : Pat<(load xaddr:$addr), (i32 (LW xaddr:$addr))>; // 16-bit load and store -def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$addr), (SH GPR:$dst, xaddr:$addr)>; +def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$ad), (SH GPR:$dst, xaddr:$ad)>; def : Pat<(zextloadi16 xaddr:$addr), (i32 (LHU xaddr:$addr))>; // 8-bit load and store -def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$addr), (SB GPR:$dst, xaddr:$addr)>; +def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$ad), (SB GPR:$dst, xaddr:$ad)>; def : Pat<(zextloadi8 xaddr:$addr), (i32 (LBU xaddr:$addr))>; // Peepholes diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/lib/Target/MBlaze/MBlazeMCInstLower.cpp index a7e400b..7e5598f 100644 --- a/lib/Target/MBlaze/MBlazeMCInstLower.cpp +++ b/lib/Target/MBlaze/MBlazeMCInstLower.cpp @@ -1,4 +1,4 @@ -//===-- MBLazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===// +//===-- MBlazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index 4ad7bd6..5ed81dd 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -33,16 +33,16 @@ extern "C" void LLVMInitializeMBlazeTarget() { // an easier handling. MBlazeTargetMachine:: MBlazeTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL): - LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), - Subtarget(TT, CPU, FS), - DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"), - InstrInfo(*this), - FrameLowering(Subtarget), - TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()) { + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS), + DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"), + InstrInfo(*this), + FrameLowering(Subtarget), + TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this), + InstrItins(Subtarget.getInstrItineraryData()) { } // Install an instruction selector pass using diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h index 1c1aa53..036f1b6 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -43,6 +43,7 @@ namespace llvm { public: MBlazeTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); diff --git a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt index 37871b6..6fa7f43 100644 --- a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt @@ -5,11 +5,4 @@ add_llvm_library(LLVMMBlazeDesc MBlazeMCTargetDesc.cpp ) -add_llvm_library_dependencies(LLVMMBlazeDesc - LLVMMBlazeAsmPrinter - LLVMMBlazeInfo - LLVMMC - LLVMSupport - ) - add_dependencies(LLVMMBlazeDesc MBlazeCommonTableGen) diff --git a/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt b/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt index e89811b..4982f0f 100644 --- a/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MBlazeDesc parent = MBlaze required_libraries = MBlazeAsmPrinter MBlazeInfo MC Support add_to_library_groups = MBlaze - diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp index 08f7d46..d5acbe9 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp @@ -58,6 +58,11 @@ public: bool MayNeedRelaxation(const MCInst &Inst) const; + bool fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const; + void RelaxInstruction(const MCInst &Inst, MCInst &Res) const; bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; @@ -87,6 +92,18 @@ bool MBlazeAsmBackend::MayNeedRelaxation(const MCInst &Inst) const { return hasExprOrImm; } +bool MBlazeAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const { + // FIXME: Is this right? It's what the "generic" code was doing before, + // but is X86 specific. Is it actually true for MBlaze also, or was it + // just close enough to not be a big deal? + // + // Relax if the value is too big for a (signed) i8. + return int64_t(Value) != int64_t(int8_t(Value)); +} + void MBlazeAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { Res = Inst; Res.setOpcode(getRelaxedOpcode(Inst.getOpcode())); diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h index 776dbc4..c8bdd6f 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h @@ -51,6 +51,7 @@ namespace MBlazeII { FRRRR, FRI, FC, + FRR, FormMask = 63 //===------------------------------------------------------------------===// diff --git a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt index 93fce58..b554d9b 100644 --- a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt +++ b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt @@ -5,10 +5,4 @@ add_llvm_library(LLVMMBlazeInfo MBlazeTargetInfo.cpp ) -add_llvm_library_dependencies(LLVMMBlazeInfo - LLVMMC - LLVMSupport - LLVMTarget - ) - add_dependencies(LLVMMBlazeInfo MBlazeCommonTableGen) diff --git a/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt b/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt index 938a1d9..ba7ee5d 100644 --- a/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt +++ b/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MBlazeInfo parent = MBlaze required_libraries = MC Support Target add_to_library_groups = MBlaze - diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt index 55c2d7d..7daa7a2 100644 --- a/lib/Target/MSP430/CMakeLists.txt +++ b/lib/Target/MSP430/CMakeLists.txt @@ -22,19 +22,6 @@ add_llvm_target(MSP430CodeGen MSP430MCInstLower.cpp ) -add_llvm_library_dependencies(LLVMMSP430CodeGen - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMC - LLVMMSP430AsmPrinter - LLVMMSP430Desc - LLVMMSP430Info - LLVMSelectionDAG - LLVMSupport - LLVMTarget - ) - add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/lib/Target/MSP430/InstPrinter/CMakeLists.txt index ce39d95..64ac994 100644 --- a/lib/Target/MSP430/InstPrinter/CMakeLists.txt +++ b/lib/Target/MSP430/InstPrinter/CMakeLists.txt @@ -4,9 +4,4 @@ add_llvm_library(LLVMMSP430AsmPrinter MSP430InstPrinter.cpp ) -add_llvm_library_dependencies(LLVMMSP430AsmPrinter - LLVMMC - LLVMSupport - ) - add_dependencies(LLVMMSP430AsmPrinter MSP430CommonTableGen) diff --git a/lib/Target/MSP430/InstPrinter/LLVMBuild.txt b/lib/Target/MSP430/InstPrinter/LLVMBuild.txt index aeb863a..37b8c25 100644 --- a/lib/Target/MSP430/InstPrinter/LLVMBuild.txt +++ b/lib/Target/MSP430/InstPrinter/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MSP430AsmPrinter parent = MSP430 required_libraries = MC Support add_to_library_groups = MSP430 - diff --git a/lib/Target/MSP430/LLVMBuild.txt b/lib/Target/MSP430/LLVMBuild.txt index 024312b..51d9702 100644 --- a/lib/Target/MSP430/LLVMBuild.txt +++ b/lib/Target/MSP430/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = InstPrinter MCTargetDesc TargetInfo + [component_0] type = TargetGroup name = MSP430 @@ -27,4 +30,3 @@ name = MSP430CodeGen parent = MSP430 required_libraries = AsmPrinter CodeGen Core MC MSP430AsmPrinter MSP430Desc MSP430Info SelectionDAG Support Target add_to_library_groups = MSP430 - diff --git a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt index c2dd448..adc95c5 100644 --- a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt @@ -3,12 +3,4 @@ add_llvm_library(LLVMMSP430Desc MSP430MCAsmInfo.cpp ) -add_llvm_library_dependencies(LLVMMSP430Desc - LLVMMC - LLVMMSP430AsmPrinter - LLVMMSP430Info - LLVMSupport - LLVMTarget - ) - add_dependencies(LLVMMSP430Desc MSP430CommonTableGen) diff --git a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt index 1890e9d..3319d93 100644 --- a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MSP430Desc parent = MSP430 required_libraries = MC MSP430AsmPrinter MSP430Info Support Target add_to_library_groups = MSP430 - diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp index c99f4ab..e406ff2 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.cpp +++ b/lib/Target/MSP430/MSP430FrameLowering.cpp @@ -29,7 +29,7 @@ using namespace llvm; bool MSP430FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return (DisableFramePointerElim(MF) || + return (MF.getTarget().Options.DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects() || MFI->isFrameAddressTaken()); } @@ -140,7 +140,7 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF, while (MBBI != MBB.begin()) { MachineBasicBlock::iterator PI = prior(MBBI); unsigned Opc = PI->getOpcode(); - if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator()) + if (Opc != MSP430::POP16r && !PI->isTerminator()) break; --MBBI; } diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 5c94137..884d69b 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -122,8 +122,12 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : setOperationAction(ISD::CTTZ, MVT::i8, Expand); setOperationAction(ISD::CTTZ, MVT::i16, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand); setOperationAction(ISD::CTLZ, MVT::i8, Expand); setOperationAction(ISD::CTLZ, MVT::i16, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand); setOperationAction(ISD::CTPOP, MVT::i8, Expand); setOperationAction(ISD::CTPOP, MVT::i16, Expand); diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index 81f766e..9d3c7e9 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -158,13 +158,12 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { } bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.isTerminator()) return false; + if (!MI->isTerminator()) return false; // Conditional branch is a special case. - if (MCID.isBranch() && !MCID.isBarrier()) + if (MI->isBranch() && !MI->isBarrier()) return true; - if (!MCID.isPredicable()) + if (!MI->isPredicable()) return true; return !isPredicated(MI); } @@ -189,7 +188,7 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // A terminator that isn't a branch can't easily be handled // by this analysis. - if (!I->getDesc().isBranch()) + if (!I->isBranch()) return true; // Cannot handle indirect branches. diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index fe185fb..a0fc3da 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -28,9 +28,10 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS), // FIXME: Check TargetData string. DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index 4fb060f..28d482a 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -39,7 +39,7 @@ class MSP430TargetMachine : public LLVMTargetMachine { public: MSP430TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); diff --git a/lib/Target/MSP430/TargetInfo/CMakeLists.txt b/lib/Target/MSP430/TargetInfo/CMakeLists.txt index 1526946..f6b40ea 100644 --- a/lib/Target/MSP430/TargetInfo/CMakeLists.txt +++ b/lib/Target/MSP430/TargetInfo/CMakeLists.txt @@ -4,10 +4,4 @@ add_llvm_library(LLVMMSP430Info MSP430TargetInfo.cpp ) -add_llvm_library_dependencies(LLVMMSP430Info - LLVMMC - LLVMSupport - LLVMTarget - ) - add_dependencies(LLVMMSP430Info MSP430CommonTableGen) diff --git a/lib/Target/MSP430/TargetInfo/LLVMBuild.txt b/lib/Target/MSP430/TargetInfo/LLVMBuild.txt index a745ea8..deafc2d 100644 --- a/lib/Target/MSP430/TargetInfo/LLVMBuild.txt +++ b/lib/Target/MSP430/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MSP430Info parent = MSP430 required_libraries = MC Support Target add_to_library_groups = MSP430 - diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index ac9cfc0..a13c0e8 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -29,19 +29,6 @@ add_llvm_target(MipsCodeGen MipsSelectionDAGInfo.cpp ) -add_llvm_library_dependencies(LLVMMipsCodeGen - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMC - LLVMMipsAsmPrinter - LLVMMipsDesc - LLVMMipsInfo - LLVMSelectionDAG - LLVMSupport - LLVMTarget - ) - add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/Mips/InstPrinter/CMakeLists.txt b/lib/Target/Mips/InstPrinter/CMakeLists.txt index c45b35d..3e9fbf1 100644 --- a/lib/Target/Mips/InstPrinter/CMakeLists.txt +++ b/lib/Target/Mips/InstPrinter/CMakeLists.txt @@ -4,9 +4,4 @@ add_llvm_library(LLVMMipsAsmPrinter MipsInstPrinter.cpp ) -add_llvm_library_dependencies(LLVMMipsAsmPrinter - LLVMMC - LLVMSupport - ) - add_dependencies(LLVMMipsAsmPrinter MipsCommonTableGen) diff --git a/lib/Target/Mips/InstPrinter/LLVMBuild.txt b/lib/Target/Mips/InstPrinter/LLVMBuild.txt index d953a61..317057b 100644 --- a/lib/Target/Mips/InstPrinter/LLVMBuild.txt +++ b/lib/Target/Mips/InstPrinter/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MipsAsmPrinter parent = Mips required_libraries = MC Support add_to_library_groups = Mips - diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index f544d39..3e9c46a 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -96,10 +96,14 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) { case MCSymbolRefExpr::VK_None: break; case MCSymbolRefExpr::VK_Mips_GPREL: OS << "%gp_rel("; break; case MCSymbolRefExpr::VK_Mips_GOT_CALL: OS << "%call16("; break; + case MCSymbolRefExpr::VK_Mips_GOT16: OS << "%got("; break; case MCSymbolRefExpr::VK_Mips_GOT: OS << "%got("; break; case MCSymbolRefExpr::VK_Mips_ABS_HI: OS << "%hi("; break; case MCSymbolRefExpr::VK_Mips_ABS_LO: OS << "%lo("; break; case MCSymbolRefExpr::VK_Mips_TLSGD: OS << "%tlsgd("; break; + case MCSymbolRefExpr::VK_Mips_TLSLDM: OS << "%tlsldm("; break; + case MCSymbolRefExpr::VK_Mips_DTPREL_HI:OS << "%dtprel_hi("; break; + case MCSymbolRefExpr::VK_Mips_DTPREL_LO:OS << "%dtprel_lo("; break; case MCSymbolRefExpr::VK_Mips_GOTTPREL: OS << "%gottprel("; break; case MCSymbolRefExpr::VK_Mips_TPREL_HI: OS << "%tprel_hi("; break; case MCSymbolRefExpr::VK_Mips_TPREL_LO: OS << "%tprel_lo("; break; diff --git a/lib/Target/Mips/LLVMBuild.txt b/lib/Target/Mips/LLVMBuild.txt index e733b52..bcd32bc 100644 --- a/lib/Target/Mips/LLVMBuild.txt +++ b/lib/Target/Mips/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = InstPrinter MCTargetDesc TargetInfo + [component_0] type = TargetGroup name = Mips @@ -28,4 +31,3 @@ name = MipsCodeGen parent = Mips required_libraries = AsmPrinter CodeGen Core MC MipsAsmPrinter MipsDesc MipsInfo SelectionDAG Support Target add_to_library_groups = Mips - diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt index 2ceb5c9..0eb0a55 100644 --- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt @@ -5,11 +5,4 @@ add_llvm_library(LLVMMipsDesc MipsMCTargetDesc.cpp ) -add_llvm_library_dependencies(LLVMMipsDesc - LLVMMC - LLVMMipsAsmPrinter - LLVMMipsInfo - LLVMSupport - ) - add_dependencies(LLVMMipsDesc MipsCommonTableGen) diff --git a/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt b/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt index d6f5dd2..29f5da6 100644 --- a/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MipsDesc parent = Mips required_libraries = MC MipsAsmPrinter MipsInfo Support add_to_library_groups = Mips - diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 7bc5fe4..60ff4fe 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -29,13 +29,19 @@ #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; +// Prepare value for the target space for it static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { // Add/subtract and shift switch (Kind) { default: + return 0; + case FK_GPRel_4: + case FK_Data_4: + case Mips::fixup_Mips_LO16: break; case Mips::fixup_Mips_PC16: // So far we are only using this type for branches. @@ -52,25 +58,10 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { // address range. Value >>= 2; break; - } - - // Mask off value for placement as an operand - switch (Kind) { - default: - break; - case FK_GPRel_4: - case FK_Data_4: - Value &= 0xffffffff; - break; - case Mips::fixup_Mips_26: - Value &= 0x03ffffff; - break; - case Mips::fixup_Mips_LO16: - case Mips::fixup_Mips_PC16: - Value &= 0x0000ffff; - break; case Mips::fixup_Mips_HI16: - Value >>= 16; + case Mips::fixup_Mips_GOT_Local: + // Get the higher 16-bits. Also add 1 if bit 15 is 1. + Value = (Value >> 16) + ((Value & 0x8000) != 0); break; } @@ -96,42 +87,40 @@ public: /// fixup kind as appropriate. void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { - unsigned Kind = (unsigned)Fixup.getKind(); - Value = adjustFixupValue(Kind, Value); + MCFixupKind Kind = Fixup.getKind(); + Value = adjustFixupValue((unsigned)Kind, Value); if (!Value) - return; // Doesn't change encoding. + return; // Doesn't change encoding. unsigned Offset = Fixup.getOffset(); - switch (Kind) { - default: - llvm_unreachable("Unknown fixup kind!"); - case Mips::fixup_Mips_GOT16: // This will be fixed up at link time - break; - case FK_GPRel_4: - case FK_Data_4: - case Mips::fixup_Mips_26: - case Mips::fixup_Mips_LO16: - case Mips::fixup_Mips_PC16: - case Mips::fixup_Mips_HI16: - // For each byte of the fragment that the fixup touches, mask i - // the fixup value. The Value has been "split up" into the appr - // bitfields above. - for (unsigned i = 0; i != 4; ++i) // FIXME - Need to support 2 and 8 bytes - Data[Offset + i] += uint8_t((Value >> (i * 8)) & 0xff); - break; + // FIXME: The below code will not work across endian models + // How many bytes/bits are we fixing up? + unsigned NumBytes = ((getFixupKindInfo(Kind).TargetSize-1)/8)+1; + uint64_t Mask = ((uint64_t)1 << getFixupKindInfo(Kind).TargetSize) - 1; + + // Grab current value, if any, from bits. + uint64_t CurVal = 0; + for (unsigned i = 0; i != NumBytes; ++i) + CurVal |= ((uint8_t)Data[Offset + i]) << (i * 8); + + CurVal = (CurVal & ~Mask) | ((CurVal + Value) & Mask); + + // Write out the bytes back to the code/data bits. + // First the unaffected bits and then the fixup. + for (unsigned i = 0; i != NumBytes; ++i) { + Data[Offset + i] = uint8_t((CurVal >> (i * 8)) & 0xff); } - } +} unsigned getNumFixupKinds() const { return Mips::NumTargetFixupKinds; } const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { const static MCFixupKindInfo Infos[Mips::NumTargetFixupKinds] = { - // This table *must* be in the order that the fixup_* kinds a + // This table *must* be in same the order of fixup_* kinds in // MipsFixupKinds.h. // // name offset bits flags - { "fixup_Mips_NONE", 0, 0, 0 }, { "fixup_Mips_16", 0, 16, 0 }, { "fixup_Mips_32", 0, 32, 0 }, { "fixup_Mips_REL32", 0, 32, 0 }, @@ -140,7 +129,8 @@ public: { "fixup_Mips_LO16", 0, 16, 0 }, { "fixup_Mips_GPREL16", 0, 16, 0 }, { "fixup_Mips_LITERAL", 0, 16, 0 }, - { "fixup_Mips_GOT16", 0, 16, 0 }, + { "fixup_Mips_GOT_Global", 0, 16, 0 }, + { "fixup_Mips_GOT_Local", 0, 16, 0 }, { "fixup_Mips_PC16", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_Mips_CALL16", 0, 16, 0 }, { "fixup_Mips_GPREL32", 0, 32, 0 }, @@ -173,6 +163,17 @@ public: return false; } + /// fixupNeedsRelaxation - Target specific predicate for whether a given + /// fixup requires the associated instruction to be relaxed. + bool fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const { + // FIXME. + assert(0 && "RelaxInstruction() unimplemented"); + return false; + } + /// RelaxInstruction - Relax the instruction in the given fragment /// to the next wider instruction. /// diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h index cebfde0..00fc5df 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -31,8 +31,9 @@ namespace MipsII { MO_NO_FLAG, - /// MO_GOT - Represents the offset into the global offset table at which + /// MO_GOT16 - Represents the offset into the global offset table at which /// the address the relocation entry symbol resides during execution. + MO_GOT16, MO_GOT, /// MO_GOT_CALL - Represents the offset into the global offset table at @@ -55,6 +56,13 @@ namespace MipsII { // Dynamic TLS). MO_TLSGD, + /// MO_TLSLDM - Represents the offset into the global offset table at which + // the module ID and TSL block offset reside during execution (Local + // Dynamic TLS). + MO_TLSLDM, + MO_DTPREL_HI, + MO_DTPREL_LO, + /// MO_GOTTPREL - Represents the offset from the thread pointer (Initial // Exec TLS). MO_GOTTPREL, @@ -180,6 +188,7 @@ inline static unsigned getMipsRegisterNumbering(unsigned RegEnum) case Mips::D14: return 28; case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64: + case Mips::HWR29: return 29; case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64: case Mips::D15: diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h index 20890ed..a56c002 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h +++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h @@ -14,74 +14,82 @@ namespace llvm { namespace Mips { - enum Fixups { - // fixup_Mips_xxx - R_MIPS_NONE - fixup_Mips_NONE = FirstTargetFixupKind, + // Although most of the current fixup types reflect a unique relocation + // one can have multiple fixup types for a given relocation and thus need + // to be uniquely named. + // + // This table *must* be in the save order of + // MCFixupKindInfo Infos[Mips::NumTargetFixupKinds] + // in MipsAsmBackend.cpp. + // + enum Fixups { + // Branch fixups resulting in R_MIPS_16. + fixup_Mips_16 = FirstTargetFixupKind, - // fixup_Mips_xxx - R_MIPS_16. - fixup_Mips_16, + // Pure 32 bit data fixup resulting in - R_MIPS_32. + fixup_Mips_32, - // fixup_Mips_xxx - R_MIPS_32. - fixup_Mips_32, + // Full 32 bit data relative data fixup resulting in - R_MIPS_REL32. + fixup_Mips_REL32, - // fixup_Mips_xxx - R_MIPS_REL32. - fixup_Mips_REL32, + // Jump 26 bit fixup resulting in - R_MIPS_26. + fixup_Mips_26, - // fixup_Mips_xxx - R_MIPS_26. - fixup_Mips_26, + // Pure upper 16 bit fixup resulting in - R_MIPS_HI16. + fixup_Mips_HI16, - // fixup_Mips_xxx - R_MIPS_HI16. - fixup_Mips_HI16, + // Pure lower 16 bit fixup resulting in - R_MIPS_LO16. + fixup_Mips_LO16, - // fixup_Mips_xxx - R_MIPS_LO16. - fixup_Mips_LO16, + // 16 bit fixup for GP offest resulting in - R_MIPS_GPREL16. + fixup_Mips_GPREL16, - // fixup_Mips_xxx - R_MIPS_GPREL16. - fixup_Mips_GPREL16, + // 16 bit literal fixup resulting in - R_MIPS_LITERAL. + fixup_Mips_LITERAL, - // fixup_Mips_xxx - R_MIPS_LITERAL. - fixup_Mips_LITERAL, + // Global symbol fixup resulting in - R_MIPS_GOT16. + fixup_Mips_GOT_Global, - // fixup_Mips_xxx - R_MIPS_GOT16. - fixup_Mips_GOT16, + // Local symbol fixup resulting in - R_MIPS_GOT16. + fixup_Mips_GOT_Local, - // fixup_Mips_xxx - R_MIPS_PC16. - fixup_Mips_PC16, + // PC relative branch fixup resulting in - R_MIPS_PC16. + fixup_Mips_PC16, - // fixup_Mips_xxx - R_MIPS_CALL16. - fixup_Mips_CALL16, + // resulting in - R_MIPS_CALL16. + fixup_Mips_CALL16, - // fixup_Mips_xxx - R_MIPS_GPREL32. - fixup_Mips_GPREL32, + // resulting in - R_MIPS_GPREL32. + fixup_Mips_GPREL32, - // fixup_Mips_xxx - R_MIPS_SHIFT5. - fixup_Mips_SHIFT5, + // resulting in - R_MIPS_SHIFT5. + fixup_Mips_SHIFT5, - // fixup_Mips_xxx - R_MIPS_SHIFT6. - fixup_Mips_SHIFT6, + // resulting in - R_MIPS_SHIFT6. + fixup_Mips_SHIFT6, - // fixup_Mips_xxx - R_MIPS_64. - fixup_Mips_64, + // Pure 64 bit data fixup resulting in - R_MIPS_64. + fixup_Mips_64, - // fixup_Mips_xxx - R_MIPS_TLS_GD. - fixup_Mips_TLSGD, + // resulting in - R_MIPS_TLS_GD. + fixup_Mips_TLSGD, - // fixup_Mips_xxx - R_MIPS_TLS_GOTTPREL. - fixup_Mips_GOTTPREL, + // resulting in - R_MIPS_TLS_GOTTPREL. + fixup_Mips_GOTTPREL, - // fixup_Mips_xxx - R_MIPS_TLS_TPREL_HI16. - fixup_Mips_TPREL_HI, + // resulting in - R_MIPS_TLS_TPREL_HI16. + fixup_Mips_TPREL_HI, - // fixup_Mips_xxx - R_MIPS_TLS_TPREL_LO16. - fixup_Mips_TPREL_LO, + // resulting in - R_MIPS_TLS_TPREL_LO16. + fixup_Mips_TPREL_LO, - // fixup_Mips_xxx - yyy. // This should become R_MIPS_PC16 - fixup_Mips_Branch_PCRel, + // PC relative branch fixup resulting in - R_MIPS_PC16 + fixup_Mips_Branch_PCRel, - // Marker - LastTargetFixupKind, - NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind - }; + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind + }; } // namespace Mips } // namespace llvm diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 0c3cbb3..463dcfe 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -194,8 +194,11 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, case MCSymbolRefExpr::VK_Mips_GOT_CALL: FixupKind = Mips::fixup_Mips_CALL16; break; + case MCSymbolRefExpr::VK_Mips_GOT16: + FixupKind = Mips::fixup_Mips_GOT_Global; + break; case MCSymbolRefExpr::VK_Mips_GOT: - FixupKind = Mips::fixup_Mips_GOT16; + FixupKind = Mips::fixup_Mips_GOT_Local; break; case MCSymbolRefExpr::VK_Mips_ABS_HI: FixupKind = Mips::fixup_Mips_HI16; @@ -245,8 +248,8 @@ unsigned MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { assert(MI.getOperand(OpNo).isImm()); - unsigned szEncoding = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups); - return szEncoding - 1; + unsigned SizeEncoding = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups); + return SizeEncoding - 1; } // FIXME: should be called getMSBEncoding @@ -256,10 +259,10 @@ MipsMCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { assert(MI.getOperand(OpNo-1).isImm()); assert(MI.getOperand(OpNo).isImm()); - unsigned pos = getMachineOpValue(MI, MI.getOperand(OpNo-1), Fixups); - unsigned sz = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups); + unsigned Position = getMachineOpValue(MI, MI.getOperand(OpNo-1), Fixups); + unsigned Size = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups); - return pos + sz - 1; + return Position + Size - 1; } #include "MipsGenMCCodeEmitter.inc" diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index 39c2c16..e9e0f60 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -79,9 +79,9 @@ def FeatureMips64r2 : SubtargetFeature<"mips64r2", "MipsArchVersion", class Proc<string Name, list<SubtargetFeature> Features> : Processor<Name, MipsGenericItineraries, Features>; -def : Proc<"mips32r1", [FeatureMips32]>; -def : Proc<"4ke", [FeatureMips32r2]>; -def : Proc<"mips64r1", [FeatureMips64]>; +def : Proc<"mips32", [FeatureMips32]>; +def : Proc<"mips32r2", [FeatureMips32r2]>; +def : Proc<"mips64", [FeatureMips64]>; def : Proc<"mips64r2", [FeatureMips64r2]>; def MipsAsmWriter : AsmWriter { diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index b0fb4fa..2996986 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -25,7 +25,7 @@ def uimm16_64 : Operand<i64> { // Transformation Function - get Imm - 32. def Subtract32 : SDNodeXForm<imm, [{ - return getI32Imm((unsigned)N->getZExtValue() - 32); + return getImm(N, (unsigned)N->getZExtValue() - 32); }]>; // shamt field must fit in 5 bits. @@ -36,6 +36,19 @@ def imm32_63 : ImmLeaf<i32, [{return (int32_t)Imm >= 32 && (int32_t)Imm < 64;}], Subtract32>; +// Is a 32-bit int. +def immSExt32 : ImmLeaf<i64, [{return isInt<32>(Imm);}]>; + +// Transformation Function - get the higher 16 bits. +def HIGHER : SDNodeXForm<imm, [{ + return getImm(N, (N->getZExtValue() >> 32) & 0xFFFF); +}]>; + +// Transformation Function - get the highest 16 bits. +def HIGHEST : SDNodeXForm<imm, [{ + return getImm(N, (N->getZExtValue() >> 48) & 0xFFFF); +}]>; + //===----------------------------------------------------------------------===// // Instructions specific format //===----------------------------------------------------------------------===// @@ -206,6 +219,17 @@ let Uses = [SP_64] in def DynAlloc64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>, Requires<[IsN64]>; +def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>; + +def DEXT : ExtBase<3, "dext", CPU64Regs>; +def DINS : InsBase<7, "dins", CPU64Regs>; + +def DSLL64_32 : FR<0x3c, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt), + "dsll32\t$rd, $rt, 0", [], IIAlu>; + +def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt), + "sll\t$rd, $rt, 0", [], IIAlu>; + //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// @@ -216,9 +240,15 @@ def : Pat<(i64 immSExt16:$in), def : Pat<(i64 immZExt16:$in), (ORi64 ZERO_64, imm:$in)>; +// 32-bit immediates +def : Pat<(i64 immSExt32:$imm), + (ORi64 (LUi64 (HI16 imm:$imm)), (LO16 imm:$imm))>; + // Arbitrary immediates def : Pat<(i64 imm:$imm), - (ORi64 (LUi64 (HI16 imm:$imm)), (LO16 imm:$imm))>; + (ORi64 (DSLL (ORi64 (DSLL (ORi64 (LUi64 (HIGHEST imm:$imm)), + (HIGHER imm:$imm)), 16), (HI16 imm:$imm)), 16), + (LO16 imm:$imm))>; // extended loads let Predicates = [NotN64] in { @@ -236,11 +266,13 @@ def : Pat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>; def : Pat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>; def : Pat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>; def : Pat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>; +def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>; def : Pat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>; def : Pat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>; def : Pat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>; def : Pat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>; +def : Pat<(MipsLo tglobaltlsaddr:$in), (DADDiu ZERO_64, tglobaltlsaddr:$in)>; def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)), (DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>; @@ -250,6 +282,15 @@ def : Pat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)), (DADDiu CPU64Regs:$hi, tjumptable:$lo)>; def : Pat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)), (DADDiu CPU64Regs:$hi, tconstpool:$lo)>; +def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaltlsaddr:$lo)), + (DADDiu CPU64Regs:$hi, tglobaltlsaddr:$lo)>; + +def : WrapperPat<tglobaladdr, DADDiu, GP_64>; +def : WrapperPat<tconstpool, DADDiu, GP_64>; +def : WrapperPat<texternalsym, DADDiu, GP_64>; +def : WrapperPat<tblockaddress, DADDiu, GP_64>; +def : WrapperPat<tjumptable, DADDiu, GP_64>; +def : WrapperPat<tglobaltlsaddr, DADDiu, GP_64>; defm : BrcondPats<CPU64Regs, BEQ64, BNE64, SLT64, SLTu64, SLTi64, SLTiu64, ZERO_64>; @@ -268,3 +309,6 @@ def : Pat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>, Requires<[IsN64]>; def : Pat<(i32 (trunc CPU64Regs:$src)), (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>, Requires<[IsN64]>; +// 32-to-64-bit extension +def : Pat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>; +def : Pat<(i64 (zext CPURegs:$src)), (DSRL32 (DSLL64_32 CPURegs:$src), 0)>; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index d27e3ab..a5505d3 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -96,19 +96,17 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (!OutStreamer.hasRawTextSupport()) { // Lower CPLOAD and CPRESTORE - if (Opc == Mips::CPLOAD) { + if (Opc == Mips::CPLOAD) MCInstLowering.LowerCPLOAD(MI, MCInsts); - for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I - != MCInsts.end(); ++I) + else if (Opc == Mips::CPRESTORE) + MCInstLowering.LowerCPRESTORE(MI, MCInsts); + + if (!MCInsts.empty()) { + for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); + I != MCInsts.end(); ++I) OutStreamer.EmitInstruction(*I); return; } - - if (Opc == Mips::CPRESTORE) { - MCInstLowering.LowerCPRESTORE(MI, TmpInst0); - OutStreamer.EmitInstruction(TmpInst0); - return; - } } OutStreamer.EmitInstruction(TmpInst0); @@ -317,9 +315,9 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock* // Otherwise, check the last instruction. // Check if the last terminator is an unconditional branch. MachineBasicBlock::const_iterator I = Pred->end(); - while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) ; + while (I != Pred->begin() && !(--I)->isTerminator()) ; - return !I->getDesc().isBarrier(); + return !I->isBarrier(); } // Print out an operand for an inline asm expression. diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index a8f29ae..6b26e24 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -144,7 +144,7 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB){ MCE.StartMachineBasicBlock(MBB); - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) emitInstruction(*I); } @@ -161,7 +161,7 @@ unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI, if (Form == MipsII::FrmJ) return Mips::reloc_mips_26; if ((Form == MipsII::FrmI || Form == MipsII::FrmFI) - && MI.getDesc().isBranch()) + && MI.isBranch()) return Mips::reloc_mips_branch; if (Form == MipsII::FrmI && MI.getOpcode() == Mips::LUi) return Mips::reloc_mips_hi; diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index be3b7a0..1d9e9b0 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -96,7 +96,7 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) { LastFiller = MBB.end(); for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) - if (I->getDesc().hasDelaySlot()) { + if (I->hasDelaySlot()) { ++FilledSlots; Changed = true; @@ -146,7 +146,7 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB, || I->isInlineAsm() || I->isLabel() || FI == LastFiller - || I->getDesc().isPseudo() + || I->isPseudo() // // Should not allow: // ERET, DERET or WAIT, PAUSE. Need to add these to instruction @@ -174,16 +174,15 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, if (candidate->isImplicitDef() || candidate->isKill()) return true; - MCInstrDesc MCID = candidate->getDesc(); // Loads or stores cannot be moved past a store to the delay slot // and stores cannot be moved past a load. - if (MCID.mayLoad()) { + if (candidate->mayLoad()) { if (sawStore) return true; sawLoad = true; } - if (MCID.mayStore()) { + if (candidate->mayStore()) { if (sawStore) return true; sawStore = true; @@ -191,7 +190,7 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, return true; } - assert((!MCID.isCall() && !MCID.isReturn()) && + assert((!candidate->isCall() && !candidate->isReturn()) && "Cannot put calls or returns in delay slot."); for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) { @@ -221,11 +220,11 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI, SmallSet<unsigned, 32>& RegUses) { // If MI is a call or return, just examine the explicit non-variadic operands. MCInstrDesc MCID = MI->getDesc(); - unsigned e = MCID.isCall() || MCID.isReturn() ? MCID.getNumOperands() : - MI->getNumOperands(); + unsigned e = MI->isCall() || MI->isReturn() ? MCID.getNumOperands() : + MI->getNumOperands(); // Add RA to RegDefs to prevent users of RA from going into delay slot. - if (MCID.isCall()) + if (MI->isCall()) RegDefs.insert(Mips::RA); for (unsigned i = 0; i != e; ++i) { diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 36aef99..2466545 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -85,8 +85,8 @@ using namespace llvm; // if frame pointer elimination is disabled. bool MipsFrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() - || MFI->isFrameAddressTaken(); + return MF.getTarget().Options.DisableFramePointerElim(MF) || + MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken(); } bool MipsFrameLowering::targetHandlesStackFrameRounding() const { diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 9c831ed..b17239d 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -86,10 +86,9 @@ private: // Complex Pattern. bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset); - // getI32Imm - Return a target constant with the specified - // value, of type i32. - inline SDValue getI32Imm(unsigned Imm) { - return CurDAG->getTargetConstant(Imm, MVT::i32); + // getImm - Return a target constant with the specified value. + inline SDValue getImm(const SDNode *Node, unsigned Imm) { + return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); } virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, @@ -122,21 +121,16 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { } // on PIC code Load GA - if (TM.getRelocationModel() == Reloc::PIC_) { - if (Addr.getOpcode() == MipsISD::WrapperPIC) { - Base = CurDAG->getRegister(GPReg, ValTy); - Offset = Addr.getOperand(0); - return true; - } - } else { + if (Addr.getOpcode() == MipsISD::Wrapper) { + Base = CurDAG->getRegister(GPReg, ValTy); + Offset = Addr.getOperand(0); + return true; + } + + if (TM.getRelocationModel() != Reloc::PIC_) { if ((Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress)) return false; - else if (Addr.getOpcode() == ISD::TargetGlobalTLSAddress) { - Base = CurDAG->getRegister(GPReg, ValTy); - Offset = Addr; - return true; - } } // Addresses of the form FI+const or FI|const @@ -310,13 +304,24 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { } case MipsISD::ThreadPointer: { - unsigned SrcReg = Mips::HWR29; - unsigned DestReg = Mips::V1; - SDNode *Rdhwr = CurDAG->getMachineNode(Mips::RDHWR, Node->getDebugLoc(), - Node->getValueType(0), CurDAG->getRegister(SrcReg, MVT::i32)); + EVT PtrVT = TLI.getPointerTy(); + unsigned RdhwrOpc, SrcReg, DestReg; + + if (PtrVT == MVT::i32) { + RdhwrOpc = Mips::RDHWR; + SrcReg = Mips::HWR29; + DestReg = Mips::V1; + } else { + RdhwrOpc = Mips::RDHWR64; + SrcReg = Mips::HWR29_64; + DestReg = Mips::V1_64; + } + + SDNode *Rdhwr = CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(), + Node->getValueType(0), CurDAG->getRegister(SrcReg, PtrVT)); SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg, SDValue(Rdhwr, 0)); - SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, MVT::i32); + SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, PtrVT); ReplaceUses(SDValue(Node, 0), ResNode); return ResNode.getNode(); } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index b5a15cf..c9b657c 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -40,11 +40,11 @@ using namespace llvm; // mask (Pos), and return true. // For example, if I is 0x003ff800, (Pos, Size) = (11, 11). static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { - if (!isUInt<32>(I) || !isShiftedMask_32(I)) + if (!isShiftedMask_64(I)) return false; - Size = CountPopulation_32(I); - Pos = CountTrailingZeros_32(I); + Size = CountPopulation_64(I); + Pos = CountTrailingZeros_64(I); return true; } @@ -54,9 +54,6 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::Hi: return "MipsISD::Hi"; case MipsISD::Lo: return "MipsISD::Lo"; case MipsISD::GPRel: return "MipsISD::GPRel"; - case MipsISD::TlsGd: return "MipsISD::TlsGd"; - case MipsISD::TprelHi: return "MipsISD::TprelHi"; - case MipsISD::TprelLo: return "MipsISD::TprelLo"; case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer"; case MipsISD::Ret: return "MipsISD::Ret"; case MipsISD::FPBrcond: return "MipsISD::FPBrcond"; @@ -72,7 +69,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::DivRemU: return "MipsISD::DivRemU"; case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64"; case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64"; - case MipsISD::WrapperPIC: return "MipsISD::WrapperPIC"; + case MipsISD::Wrapper: return "MipsISD::Wrapper"; case MipsISD::DynAlloc: return "MipsISD::DynAlloc"; case MipsISD::Sync: return "MipsISD::Sync"; case MipsISD::Ext: return "MipsISD::Ext"; @@ -129,7 +126,9 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::BlockAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i64, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); + setOperationAction(ISD::JumpTable, MVT::i64, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); @@ -157,6 +156,10 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::CTTZ, MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i64, Expand); @@ -555,20 +558,20 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG, return SDValue(); SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1); - + unsigned ShiftRightOpc = ShiftRight.getOpcode(); + // Op's first operand must be a shift right. - if (ShiftRight.getOpcode() != ISD::SRA && ShiftRight.getOpcode() != ISD::SRL) + if (ShiftRightOpc != ISD::SRA && ShiftRightOpc != ISD::SRL) return SDValue(); // The second operand of the shift must be an immediate. - uint64_t Pos; ConstantSDNode *CN; if (!(CN = dyn_cast<ConstantSDNode>(ShiftRight.getOperand(1)))) return SDValue(); - Pos = CN->getZExtValue(); - + uint64_t Pos = CN->getZExtValue(); uint64_t SMPos, SMSize; + // Op's second operand must be a shifted mask. if (!(CN = dyn_cast<ConstantSDNode>(Mask)) || !IsShiftedMask(CN->getZExtValue(), SMPos, SMSize)) @@ -576,10 +579,11 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG, // Return if the shifted mask does not start at bit 0 or the sum of its size // and Pos exceeds the word's size. - if (SMPos != 0 || Pos + SMSize > 32) + EVT ValTy = N->getValueType(0); + if (SMPos != 0 || Pos + SMSize > ValTy.getSizeInBits()) return SDValue(); - return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), MVT::i32, + return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), ValTy, ShiftRight.getOperand(0), DAG.getConstant(Pos, MVT::i32), DAG.getConstant(SMSize, MVT::i32)); @@ -630,10 +634,11 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG, // Return if the shift amount and the first bit position of mask are not the // same. - if (Shamt != SMPos0) + EVT ValTy = N->getValueType(0); + if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits())) return SDValue(); - return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), MVT::i32, + return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), ValTy, Shl.getOperand(0), DAG.getConstant(SMPos0, MVT::i32), DAG.getConstant(SMSize0, MVT::i32), @@ -1485,9 +1490,9 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, (GV->hasLocalLinkage() && !isa<Function>(GV))); unsigned GotFlag = IsN64 ? (HasGotOfst ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT_DISP) : - MipsII::MO_GOT; + (HasGotOfst ? MipsII::MO_GOT : MipsII::MO_GOT16); SDValue GA = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, GotFlag); - GA = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, GA); + GA = DAG.getNode(MipsISD::Wrapper, dl, ValTy, GA); SDValue ResNode = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), GA, MachinePointerInfo(), false, false, false, 0); @@ -1523,7 +1528,7 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; SDValue BAGOTOffset = DAG.getBlockAddress(BA, ValTy, true, GOTFlag); - BAGOTOffset = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, BAGOTOffset); + BAGOTOffset = DAG.getNode(MipsISD::Wrapper, dl, ValTy, BAGOTOffset); SDValue BALOOffset = DAG.getBlockAddress(BA, ValTy, true, OFSTFlag); SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), BAGOTOffset, @@ -1535,9 +1540,9 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, SDValue MipsTargetLowering:: LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { - // If the relocation model is PIC, use the General Dynamic TLS Model, - // otherwise use the Initial Exec or Local Exec TLS Model. - // TODO: implement Local Dynamic TLS model + // If the relocation model is PIC, use the General Dynamic TLS Model or + // Local Dynamic TLS model, otherwise use the Initial Exec or + // Local Exec TLS Model. GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); DebugLoc dl = GA->getDebugLoc(); @@ -1546,45 +1551,59 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { // General Dynamic TLS Model - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, - 0, MipsII::MO_TLSGD); - SDValue Tlsgd = DAG.getNode(MipsISD::TlsGd, dl, MVT::i32, TGA); - SDValue GP = DAG.getRegister(Mips::GP, MVT::i32); - SDValue Argument = DAG.getNode(ISD::ADD, dl, MVT::i32, GP, Tlsgd); + bool LocalDynamic = GV->hasInternalLinkage(); + unsigned Flag = LocalDynamic ? MipsII::MO_TLSLDM :MipsII::MO_TLSGD; + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Flag); + SDValue Argument = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, TGA); + unsigned PtrSize = PtrVT.getSizeInBits(); + IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize); + + SDValue TlsGetAddr = DAG.getExternalSymbol("__tls_get_addr", PtrVT); ArgListTy Args; ArgListEntry Entry; Entry.Node = Argument; - Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); + Entry.Ty = PtrTy; Args.push_back(Entry); + std::pair<SDValue, SDValue> CallResult = - LowerCallTo(DAG.getEntryNode(), - (Type *) Type::getInt32Ty(*DAG.getContext()), - false, false, false, false, 0, CallingConv::C, false, true, - DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, - dl); - - return CallResult.first; + LowerCallTo(DAG.getEntryNode(), PtrTy, + false, false, false, false, 0, CallingConv::C, false, true, + TlsGetAddr, Args, DAG, dl); + + SDValue Ret = CallResult.first; + + if (!LocalDynamic) + return Ret; + + SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + MipsII::MO_DTPREL_HI); + SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi); + SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + MipsII::MO_DTPREL_LO); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo); + SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Ret); + return DAG.getNode(ISD::ADD, dl, PtrVT, Add, Lo); } SDValue Offset; if (GV->isDeclaration()) { // Initial Exec TLS Model - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, MipsII::MO_GOTTPREL); - Offset = DAG.getLoad(MVT::i32, dl, + TGA = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, TGA); + Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), TGA, MachinePointerInfo(), false, false, false, 0); } else { // Local Exec TLS Model - SDVTList VTs = DAG.getVTList(MVT::i32); - SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, MipsII::MO_TPREL_HI); - SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, MipsII::MO_TPREL_LO); - SDValue Hi = DAG.getNode(MipsISD::TprelHi, dl, VTs, &TGAHi, 1); - SDValue Lo = DAG.getNode(MipsISD::TprelLo, dl, MVT::i32, TGALo); - Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo); + SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo); + Offset = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); } SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT); @@ -1594,34 +1613,29 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const SDValue MipsTargetLowering:: LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { - SDValue ResNode; - SDValue HiPart; + SDValue HiPart, JTI, JTILo; // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; - unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HI; - EVT PtrVT = Op.getValueType(); - JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); - SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag); - - if (!IsPIC) { - SDValue Ops[] = { JTI }; - HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1); + if (!IsPIC && !IsN64) { + JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MipsII::MO_ABS_HI); + HiPart = DAG.getNode(MipsISD::Hi, dl, PtrVT, JTI); + JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MipsII::MO_ABS_LO); } else {// Emit Load from Global Pointer - JTI = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, JTI); - HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, - MachinePointerInfo(), - false, false, false, 0); + unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; + unsigned OfstFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; + JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, GOTFlag); + JTI = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, JTI); + HiPart = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), JTI, + MachinePointerInfo(), false, false, false, 0); + JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OfstFlag); } - SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, - MipsII::MO_ABS_LO); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTILo); - ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); - - return ResNode; + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, JTILo); + return DAG.getNode(ISD::ADD, dl, PtrVT, HiPart, Lo); } SDValue MipsTargetLowering:: @@ -1657,7 +1671,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; SDValue CP = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(), N->getOffset(), GOTFlag); - CP = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, CP); + CP = DAG.getNode(MipsISD::Wrapper, dl, ValTy, CP); SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(), false, false, false, 0); @@ -1685,21 +1699,29 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(SV), false, false, 0); } - -static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG) { + +// Called if the size of integer registers is large enough to hold the whole +// floating point number. +static SDValue LowerFCOPYSIGNLargeIntReg(SDValue Op, SelectionDAG &DAG) { // FIXME: Use ext/ins instructions if target architecture is Mips32r2. + EVT ValTy = Op.getValueType(); + EVT IntValTy = MVT::getIntegerVT(ValTy.getSizeInBits()); + uint64_t Mask = (uint64_t)1 << (ValTy.getSizeInBits() - 1); DebugLoc dl = Op.getDebugLoc(); - SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(0)); - SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(1)); - SDValue And0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op0, - DAG.getConstant(0x7fffffff, MVT::i32)); - SDValue And1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op1, - DAG.getConstant(0x80000000, MVT::i32)); - SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, And0, And1); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Result); + SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntValTy, Op.getOperand(0)); + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntValTy, Op.getOperand(1)); + SDValue And0 = DAG.getNode(ISD::AND, dl, IntValTy, Op0, + DAG.getConstant(Mask - 1, IntValTy)); + SDValue And1 = DAG.getNode(ISD::AND, dl, IntValTy, Op1, + DAG.getConstant(Mask, IntValTy)); + SDValue Result = DAG.getNode(ISD::OR, dl, IntValTy, And0, And1); + return DAG.getNode(ISD::BITCAST, dl, ValTy, Result); } -static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool isLittle) { +// Called if the size of integer registers is not large enough to hold the whole +// floating point number (e.g. f64 & 32-bit integer register). +static SDValue +LowerFCOPYSIGNSmallIntReg(SDValue Op, SelectionDAG &DAG, bool isLittle) { // FIXME: // Use ext/ins instructions if target architecture is Mips32r2. // Eliminate redundant mfc1 and mtc1 instructions. @@ -1734,10 +1756,10 @@ SDValue MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) assert(Ty == MVT::f32 || Ty == MVT::f64); - if (Ty == MVT::f32) - return LowerFCOPYSIGN32(Op, DAG); + if (Ty == MVT::f32 || HasMips64) + return LowerFCOPYSIGNLargeIntReg(Op, DAG); else - return LowerFCOPYSIGN64(Op, DAG, Subtarget->isLittle()); + return LowerFCOPYSIGNSmallIntReg(Op, DAG, Subtarget->isLittle()); } SDValue MipsTargetLowering:: @@ -2328,7 +2350,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, // node so that legalize doesn't hack it. unsigned char OpFlag; bool IsPICCall = (IsN64 || IsPIC); // true if calls are translated to jalr $25 - bool LoadSymAddr = false; + bool GlobalOrExternal = false; SDValue CalleeLo; if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { @@ -2345,7 +2367,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, getPointerTy(), 0, OpFlag); } - LoadSymAddr = true; + GlobalOrExternal = true; } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { if (IsN64 || (!IsO32 && IsPIC)) @@ -2356,16 +2378,16 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, OpFlag = MipsII::MO_GOT_CALL; Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), OpFlag); - LoadSymAddr = true; + GlobalOrExternal = true; } SDValue InFlag; // Create nodes that load address of callee and copy it to T9 if (IsPICCall) { - if (LoadSymAddr) { + if (GlobalOrExternal) { // Load callee address - Callee = DAG.getNode(MipsISD::WrapperPIC, dl, getPointerTy(), Callee); + Callee = DAG.getNode(MipsISD::Wrapper, dl, getPointerTy(), Callee); SDValue LoadValue = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(), false, false, false, 0); @@ -2377,7 +2399,11 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, } else Callee = LoadValue; } + } + // T9 should contain the address of the callee function if + // -reloction-model=pic or it is an indirect call. + if (IsPICCall || !GlobalOrExternal) { // copy to T9 unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0)); diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index f2b64e3..81d093f 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -40,13 +40,6 @@ namespace llvm { // Handle gp_rel (small data/bss sections) relocation. GPRel, - // General Dynamic TLS - TlsGd, - - // Local Exec TLS - TprelHi, - TprelLo, - // Thread Pointer ThreadPointer, @@ -79,7 +72,7 @@ namespace llvm { BuildPairF64, ExtractElementF64, - WrapperPIC, + Wrapper, DynAlloc, diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index e1725fa..21a1862 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -115,7 +115,7 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, let Inst{15-0} = imm16; } -class CBranchBase<bits<6> op, dag outs, dag ins, string asmstr, +class BranchBase<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin, FrmI> { diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 5358dc0..ea101f7 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -29,8 +29,8 @@ using namespace llvm; MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm) : MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP), TM(tm), IsN64(TM.getSubtarget<MipsSubtarget>().isABI_N64()), - RI(*TM.getSubtargetImpl(), *this) {} - + RI(*TM.getSubtargetImpl(), *this), + UncondBrOpc(TM.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J) {} const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const { return RI; @@ -236,7 +236,8 @@ static unsigned GetAnalyzableBrOpc(unsigned Opc) { Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 || Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 || - Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::J) ? + Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B || + Opc == Mips::J) ? Opc : 0; } @@ -320,7 +321,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // If there is only one terminator instruction, process it. if (!SecondLastOpc) { // Unconditional branch - if (LastOpc == Mips::J) { + if (LastOpc == UncondBrOpc) { TBB = LastInst->getOperand(0).getMBB(); return false; } @@ -337,7 +338,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // If second to last instruction is an unconditional branch, // analyze it and remove the last instruction. - if (SecondLastOpc == Mips::J) { + if (SecondLastOpc == UncondBrOpc) { // Return if the last instruction cannot be removed. if (!AllowModify) return true; @@ -349,7 +350,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // Conditional branch followed by an unconditional branch. // The last one must be unconditional. - if (LastOpc != Mips::J) + if (LastOpc != UncondBrOpc) return true; AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond); @@ -391,14 +392,14 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, // Two-way Conditional branch. if (FBB) { BuildCondBr(MBB, TBB, DL, Cond); - BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB); + BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(FBB); return 2; } // One way branch. // Unconditional branch. if (Cond.empty()) - BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB); + BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(TBB); else // Conditional branch. BuildCondBr(MBB, TBB, DL, Cond); return 1; diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 8fa3052..70cc2cf 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -34,6 +34,7 @@ class MipsInstrInfo : public MipsGenInstrInfo { MipsTargetMachine &TM; bool IsN64; const MipsRegisterInfo RI; + unsigned UncondBrOpc; public: explicit MipsInstrInfo(MipsTargetMachine &TM); diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 0ae94ab..9fcc5fd 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -107,7 +107,7 @@ def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem, // movn %got(d)($gp), %got(c)($gp), $4 // This instruction is illegal since movn can take only register operands. -def MipsWrapperPIC : SDNode<"MipsISD::WrapperPIC", SDTIntUnaryOp>; +def MipsWrapper : SDNode<"MipsISD::Wrapper", SDTIntUnaryOp>; // Pointer to dynamically allocated stack area. def MipsDynAlloc : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc, @@ -132,6 +132,8 @@ def NotMips64 : Predicate<"!Subtarget.hasMips64()">; def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">; def IsN64 : Predicate<"Subtarget.isABI_N64()">; def NotN64 : Predicate<"!Subtarget.isABI_N64()">; +def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">; +def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">; //===----------------------------------------------------------------------===// // Mips Operand, Complex Patterns and Transformations Definitions. @@ -194,12 +196,12 @@ def size_ins : Operand<i32> { // Transformation Function - get the lower 16 bits. def LO16 : SDNodeXForm<imm, [{ - return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF); + return getImm(N, N->getZExtValue() & 0xFFFF); }]>; // Transformation Function - get the higher 16 bits. def HI16 : SDNodeXForm<imm, [{ - return getI32Imm((unsigned)N->getZExtValue() >> 16); + return getImm(N, (N->getZExtValue() >> 16) & 0xFFFF); }]>; // Node immediate fits as 16-bit sign extended on target immediate. @@ -380,21 +382,13 @@ class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC, let isPseudo = Pseudo; } -// Memory Load/Store +// Unaligned Memory Load/Store let canFoldAsLoad = 1 in -class LoadX<bits<6> op, RegisterClass RC, - Operand MemOpnd>: - FMem<op, (outs RC:$rt), (ins MemOpnd:$addr), - "", - [], IILoad> { -} +class LoadUnAlign<bits<6> op, RegisterClass RC, Operand MemOpnd>: + FMem<op, (outs RC:$rt), (ins MemOpnd:$addr), "", [], IILoad> {} -class StoreX<bits<6> op, RegisterClass RC, - Operand MemOpnd>: - FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr), - "", - [], IIStore> { -} +class StoreUnAlign<bits<6> op, RegisterClass RC, Operand MemOpnd>: + FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr), "", [], IIStore> {} // 32-bit load. multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode, @@ -415,10 +409,10 @@ multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode, } // 32-bit load. -multiclass LoadX32<bits<6> op> { - def #NAME# : LoadX<op, CPURegs, mem>, +multiclass LoadUnAlign32<bits<6> op> { + def #NAME# : LoadUnAlign<op, CPURegs, mem>, Requires<[NotN64]>; - def _P8 : LoadX<op, CPURegs, mem64>, + def _P8 : LoadUnAlign<op, CPURegs, mem64>, Requires<[IsN64]>; } // 32-bit store. @@ -440,18 +434,18 @@ multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode, } // 32-bit store. -multiclass StoreX32<bits<6> op> { - def #NAME# : StoreX<op, CPURegs, mem>, +multiclass StoreUnAlign32<bits<6> op> { + def #NAME# : StoreUnAlign<op, CPURegs, mem>, Requires<[NotN64]>; - def _P8 : StoreX<op, CPURegs, mem64>, + def _P8 : StoreUnAlign<op, CPURegs, mem64>, Requires<[IsN64]>; } // Conditional Branch class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>: - CBranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16), - !strconcat(instr_asm, "\t$rs, $rt, $imm16"), - [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$imm16)], IIBranch> { + BranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16), + !strconcat(instr_asm, "\t$rs, $rt, $imm16"), + [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$imm16)], IIBranch> { let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 1; @@ -459,9 +453,9 @@ class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>: class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op, RegisterClass RC>: - CBranchBase<op, (outs), (ins RC:$rs, brtarget:$imm16), - !strconcat(instr_asm, "\t$rs, $imm16"), - [(brcond (i32 (cond_op RC:$rs, 0)), bb:$imm16)], IIBranch> { + BranchBase<op, (outs), (ins RC:$rs, brtarget:$imm16), + !strconcat(instr_asm, "\t$rs, $imm16"), + [(brcond (i32 (cond_op RC:$rs, 0)), bb:$imm16)], IIBranch> { let rt = _rt; let isBranch = 1; let isTerminator = 1; @@ -485,11 +479,29 @@ class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, Operand Od, [(set CPURegs:$rt, (cond_op RC:$rs, imm_type:$imm16))], IIAlu>; -// Unconditional branch -let isBranch=1, isTerminator=1, isBarrier=1, hasDelaySlot = 1 in +// Jump class JumpFJ<bits<6> op, string instr_asm>: FJ<op, (outs), (ins jmptarget:$target), - !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch>; + !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch> { + let isBranch=1; + let isTerminator=1; + let isBarrier=1; + let hasDelaySlot = 1; + let Predicates = [RelocStatic]; +} + +// Unconditional branch +class UncondBranch<bits<6> op, string instr_asm>: + BranchBase<op, (outs), (ins brtarget:$imm16), + !strconcat(instr_asm, "\t$imm16"), [(br bb:$imm16)], IIBranch> { + let rs = 0; + let rt = 0; + let isBranch = 1; + let isTerminator = 1; + let isBarrier = 1; + let hasDelaySlot = 1; + let Predicates = [RelocPIC]; +} let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1, isIndirectBranch = 1 in @@ -616,21 +628,37 @@ class ByteSwap<bits<6> func, bits<5> sa, string instr_asm>: } // Read Hardware -class ReadHardware: FR<0x1f, 0x3b, (outs CPURegs:$rt), (ins HWRegs:$rd), - "rdhwr\t$rt, $rd", [], IIAlu> { +class ReadHardware<RegisterClass CPURegClass, RegisterClass HWRegClass> + : FR<0x1f, 0x3b, (outs CPURegClass:$rt), (ins HWRegClass:$rd), + "rdhwr\t$rt, $rd", [], IIAlu> { let rs = 0; let shamt = 0; } // Ext and Ins -class ExtIns<bits<6> _funct, string instr_asm, dag outs, dag ins, - list<dag> pattern, InstrItinClass itin>: - FR<0x1f, _funct, outs, ins, !strconcat(instr_asm, " $rt, $rs, $pos, $sz"), - pattern, itin>, Requires<[HasMips32r2]> { +class ExtBase<bits<6> _funct, string instr_asm, RegisterClass RC>: + FR<0x1f, _funct, (outs RC:$rt), (ins RC:$rs, uimm16:$pos, size_ext:$sz), + !strconcat(instr_asm, " $rt, $rs, $pos, $sz"), + [(set RC:$rt, (MipsExt RC:$rs, imm:$pos, imm:$sz))], NoItinerary> { bits<5> pos; bits<5> sz; let rd = sz; let shamt = pos; + let Predicates = [HasMips32r2]; +} + +class InsBase<bits<6> _funct, string instr_asm, RegisterClass RC>: + FR<0x1f, _funct, (outs RC:$rt), + (ins RC:$rs, uimm16:$pos, size_ins:$sz, RC:$src), + !strconcat(instr_asm, " $rt, $rs, $pos, $sz"), + [(set RC:$rt, (MipsIns RC:$rs, imm:$pos, imm:$sz, RC:$src))], + NoItinerary> { + bits<5> pos; + bits<5> sz; + let rd = sz; + let shamt = pos; + let Predicates = [HasMips32r2]; + let Constraints = "$src = $rt"; } // Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*). @@ -795,10 +823,10 @@ defm USH : StoreM32<0x29, "ush", truncstorei16_u, 1>; defm USW : StoreM32<0x2b, "usw", store_u, 1>; /// Primitives for unaligned -defm LWL : LoadX32<0x22>; -defm LWR : LoadX32<0x26>; -defm SWL : StoreX32<0x2A>; -defm SWR : StoreX32<0x2E>; +defm LWL : LoadUnAlign32<0x22>; +defm LWR : LoadUnAlign32<0x26>; +defm SWL : StoreUnAlign32<0x2A>; +defm SWR : StoreUnAlign32<0x2E>; let hasSideEffects = 1 in def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype", @@ -822,6 +850,7 @@ def J : JumpFJ<0x02, "j">; def JR : JumpFR<0x00, 0x08, "jr", CPURegs>; def JAL : JumpLink<0x03, "jal">; def JALR : JumpLinkReg<0x00, 0x09, "jalr">; +def B : UncondBranch<0x04, "b">; def BEQ : CBranch<0x04, "beq", seteq, CPURegs>; def BNE : CBranch<0x05, "bne", setne, CPURegs>; def BGEZ : CBranchZero<0x01, 1, "bgez", setge, CPURegs>; @@ -888,21 +917,10 @@ def MSUBU : MArithR<5, "msubu", MipsMSubu>; def MUL : ArithLogicR<0x1c, 0x02, "mul", mul, IIImul, CPURegs, 1>, Requires<[HasMips32]>; -def RDHWR : ReadHardware; - -def EXT : ExtIns<0, "ext", (outs CPURegs:$rt), - (ins CPURegs:$rs, uimm16:$pos, size_ext:$sz), - [(set CPURegs:$rt, - (MipsExt CPURegs:$rs, immZExt5:$pos, immZExt5:$sz))], - NoItinerary>; +def RDHWR : ReadHardware<CPURegs, HWRegs>; -let Constraints = "$src = $rt" in -def INS : ExtIns<4, "ins", (outs CPURegs:$rt), - (ins CPURegs:$rs, uimm16:$pos, size_ins:$sz, CPURegs:$src), - [(set CPURegs:$rt, - (MipsIns CPURegs:$rs, immZExt5:$pos, immZExt5:$sz, - CPURegs:$src))], - NoItinerary>; +def EXT : ExtBase<0, "ext", CPURegs>; +def INS : InsBase<4, "ins", CPURegs>; //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions @@ -939,11 +957,13 @@ def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>; def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>; def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>; def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>; +def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>; def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>; def : Pat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>; def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>; def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>; +def : Pat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>; def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)), (ADDiu CPURegs:$hi, tglobaladdr:$lo)>; @@ -953,6 +973,8 @@ def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)), (ADDiu CPURegs:$hi, tjumptable:$lo)>; def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)), (ADDiu CPURegs:$hi, tconstpool:$lo)>; +def : Pat<(add CPURegs:$hi, (MipsLo tglobaltlsaddr:$lo)), + (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>; // gp_rel relocs def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)), @@ -960,26 +982,17 @@ def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)), def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)), (ADDiu CPURegs:$gp, tconstpool:$in)>; -// tlsgd -def : Pat<(add CPURegs:$gp, (MipsTlsGd tglobaltlsaddr:$in)), - (ADDiu CPURegs:$gp, tglobaltlsaddr:$in)>; - -// tprel hi/lo -def : Pat<(MipsTprelHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>; -def : Pat<(MipsTprelLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>; -def : Pat<(add CPURegs:$hi, (MipsTprelLo tglobaltlsaddr:$lo)), - (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>; - // wrapper_pic -class WrapperPICPat<SDNode node>: - Pat<(MipsWrapperPIC node:$in), - (ADDiu GP, node:$in)>; - -def : WrapperPICPat<tglobaladdr>; -def : WrapperPICPat<tconstpool>; -def : WrapperPICPat<texternalsym>; -def : WrapperPICPat<tblockaddress>; -def : WrapperPICPat<tjumptable>; +class WrapperPat<SDNode node, Instruction ADDiuOp, Register GPReg>: + Pat<(MipsWrapper node:$in), + (ADDiuOp GPReg, node:$in)>; + +def : WrapperPat<tglobaladdr, ADDiu, GP>; +def : WrapperPat<tconstpool, ADDiu, GP>; +def : WrapperPat<texternalsym, ADDiu, GP>; +def : WrapperPat<tblockaddress, ADDiu, GP>; +def : WrapperPat<tjumptable, ADDiu, GP>; +def : WrapperPat<tglobaltlsaddr, ADDiu, GP>; // Mips does not have "not", so we expand our way def : Pat<(not CPURegs:$in), diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 6fc2af1..23486d3 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -41,10 +41,14 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, case MipsII::MO_NO_FLAG: Kind = MCSymbolRefExpr::VK_None; break; case MipsII::MO_GPREL: Kind = MCSymbolRefExpr::VK_Mips_GPREL; break; case MipsII::MO_GOT_CALL: Kind = MCSymbolRefExpr::VK_Mips_GOT_CALL; break; + case MipsII::MO_GOT16: Kind = MCSymbolRefExpr::VK_Mips_GOT16; break; case MipsII::MO_GOT: Kind = MCSymbolRefExpr::VK_Mips_GOT; break; case MipsII::MO_ABS_HI: Kind = MCSymbolRefExpr::VK_Mips_ABS_HI; break; case MipsII::MO_ABS_LO: Kind = MCSymbolRefExpr::VK_Mips_ABS_LO; break; case MipsII::MO_TLSGD: Kind = MCSymbolRefExpr::VK_Mips_TLSGD; break; + case MipsII::MO_TLSLDM: Kind = MCSymbolRefExpr::VK_Mips_TLSLDM; break; + case MipsII::MO_DTPREL_HI:Kind = MCSymbolRefExpr::VK_Mips_DTPREL_HI; break; + case MipsII::MO_DTPREL_LO:Kind = MCSymbolRefExpr::VK_Mips_DTPREL_LO; break; case MipsII::MO_GOTTPREL: Kind = MCSymbolRefExpr::VK_Mips_GOTTPREL; break; case MipsII::MO_TPREL_HI: Kind = MCSymbolRefExpr::VK_Mips_TPREL_HI; break; case MipsII::MO_TPREL_LO: Kind = MCSymbolRefExpr::VK_Mips_TPREL_LO; break; @@ -136,14 +140,35 @@ void MipsMCInstLower::LowerCPLOAD(const MachineInstr *MI, } // Lower ".cprestore offset" to "sw $gp, offset($sp)". -void MipsMCInstLower::LowerCPRESTORE(const MachineInstr *MI, MCInst &OutMI) { - OutMI.clear(); - OutMI.setOpcode(Mips::SW); - OutMI.addOperand(MCOperand::CreateReg(Mips::GP)); - OutMI.addOperand(MCOperand::CreateReg(Mips::SP)); +void MipsMCInstLower::LowerCPRESTORE(const MachineInstr *MI, + SmallVector<MCInst, 4>& MCInsts) { const MachineOperand &MO = MI->getOperand(0); assert(MO.isImm() && "CPRESTORE's operand must be an immediate."); - OutMI.addOperand(MCOperand::CreateImm(MO.getImm())); + unsigned Offset = MO.getImm(), Reg = Mips::SP; + MCInst Sw; + + if (Offset >= 0x8000) { + unsigned Hi = (Offset >> 16) + ((Offset & 0x8000) != 0); + Offset &= 0xffff; + Reg = Mips::AT; + + // lui at,hi + // addu at,at,sp + MCInsts.resize(2); + MCInsts[0].setOpcode(Mips::LUi); + MCInsts[0].addOperand(MCOperand::CreateReg(Mips::AT)); + MCInsts[0].addOperand(MCOperand::CreateImm(Hi)); + MCInsts[1].setOpcode(Mips::ADDu); + MCInsts[1].addOperand(MCOperand::CreateReg(Mips::AT)); + MCInsts[1].addOperand(MCOperand::CreateReg(Mips::AT)); + MCInsts[1].addOperand(MCOperand::CreateReg(Mips::SP)); + } + + Sw.setOpcode(Mips::SW); + Sw.addOperand(MCOperand::CreateReg(Mips::GP)); + Sw.addOperand(MCOperand::CreateReg(Reg)); + Sw.addOperand(MCOperand::CreateImm(Offset)); + MCInsts.push_back(Sw); } MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO, diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h index 98e37e4..1490c14 100644 --- a/lib/Target/Mips/MipsMCInstLower.h +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -36,7 +36,7 @@ public: MipsAsmPrinter &asmprinter); void Lower(const MachineInstr *MI, MCInst &OutMI) const; void LowerCPLOAD(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); - void LowerCPRESTORE(const MachineInstr *MI, MCInst &OutMI); + void LowerCPRESTORE(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); void LowerUnalignedLoadStore(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); private: diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 06c4a66..e5a0f08 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -125,6 +125,7 @@ getRegisterNumbering(unsigned RegEnum) case Mips::D14: return 28; case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64: + case Mips::HWR29: return 29; case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64: case Mips::D15: diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 925ad9e..76ee2e6 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -239,6 +239,7 @@ let Namespace = "Mips" in { // Hardware register $29 def HWR29 : Register<"29">; + def HWR29_64 : Register<"29">; } //===----------------------------------------------------------------------===// @@ -301,3 +302,5 @@ def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)> { // Hardware registers def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>; +def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>; + diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 016d449..dc299f2 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -31,7 +31,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, { std::string CPUName = CPU; if (CPUName.empty()) - CPUName = "mips32r1"; + CPUName = "mips32"; // Parse features string. ParseSubtargetFeatures(CPUName, FS); diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 5d6b24f..02887fa 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -34,51 +34,51 @@ extern "C" void LLVMInitializeMipsTarget() { // Using CodeModel::Large enables different CALL behavior. MipsTargetMachine:: MipsTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, - bool isLittle): - LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), - Subtarget(TT, CPU, FS, isLittle), - DataLayout(isLittle ? - (Subtarget.isABI_N64() ? - "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" : - "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") : - (Subtarget.isABI_N64() ? - "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" : - "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")), - InstrInfo(*this), - FrameLowering(Subtarget), - TLInfo(*this), TSInfo(*this), JITInfo() { + bool isLittle) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, isLittle), + DataLayout(isLittle ? + (Subtarget.isABI_N64() ? + "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" : + "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") : + (Subtarget.isABI_N64() ? + "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" : + "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")), + InstrInfo(*this), + FrameLowering(Subtarget), + TLInfo(*this), TSInfo(*this), JITInfo() { } MipsebTargetMachine:: MipsebTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) : - MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {} + CodeGenOpt::Level OL) + : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} MipselTargetMachine:: MipselTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) : - MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {} + CodeGenOpt::Level OL) + : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} Mips64ebTargetMachine:: Mips64ebTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) : - MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {} + CodeGenOpt::Level OL) + : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} Mips64elTargetMachine:: Mips64elTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) : - MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {} + CodeGenOpt::Level OL) + : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} // Install an instruction selector pass using // the ISelDag to gen Mips code. @@ -120,4 +120,3 @@ bool MipsTargetMachine::addCodeEmitter(PassManagerBase &PM, PM.add(createMipsJITCodeEmitterPass(*this, JCE)); return false; } - diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index e40d9e2..6842373 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -38,7 +38,7 @@ namespace llvm { public: MipsTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle); @@ -82,7 +82,7 @@ namespace llvm { class MipsebTargetMachine : public MipsTargetMachine { public: MipsebTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; @@ -92,7 +92,7 @@ public: class MipselTargetMachine : public MipsTargetMachine { public: MipselTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; @@ -103,6 +103,7 @@ class Mips64ebTargetMachine : public MipsTargetMachine { public: Mips64ebTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; @@ -113,6 +114,7 @@ class Mips64elTargetMachine : public MipsTargetMachine { public: Mips64elTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; diff --git a/lib/Target/Mips/TargetInfo/CMakeLists.txt b/lib/Target/Mips/TargetInfo/CMakeLists.txt index 5692604..4172d00 100644 --- a/lib/Target/Mips/TargetInfo/CMakeLists.txt +++ b/lib/Target/Mips/TargetInfo/CMakeLists.txt @@ -4,10 +4,4 @@ add_llvm_library(LLVMMipsInfo MipsTargetInfo.cpp ) -add_llvm_library_dependencies(LLVMMipsInfo - LLVMMC - LLVMSupport - LLVMTarget - ) - add_dependencies(LLVMMipsInfo MipsCommonTableGen) diff --git a/lib/Target/Mips/TargetInfo/LLVMBuild.txt b/lib/Target/Mips/TargetInfo/LLVMBuild.txt index 90ae260..2d42568 100644 --- a/lib/Target/Mips/TargetInfo/LLVMBuild.txt +++ b/lib/Target/Mips/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MipsInfo parent = Mips required_libraries = MC Support Target add_to_library_groups = Mips - diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt index 6709c1b..a9f4330 100644 --- a/lib/Target/PTX/CMakeLists.txt +++ b/lib/Target/PTX/CMakeLists.txt @@ -25,20 +25,6 @@ add_llvm_target(PTXCodeGen PTXTargetMachine.cpp ) -add_llvm_library_dependencies(LLVMPTXCodeGen - LLVMAnalysis - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMC - LLVMPTXDesc - LLVMPTXInfo - LLVMSelectionDAG - LLVMSupport - LLVMTarget - LLVMTransformUtils - ) - add_subdirectory(TargetInfo) add_subdirectory(InstPrinter) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/PTX/InstPrinter/CMakeLists.txt b/lib/Target/PTX/InstPrinter/CMakeLists.txt index 029d060..b252893 100644 --- a/lib/Target/PTX/InstPrinter/CMakeLists.txt +++ b/lib/Target/PTX/InstPrinter/CMakeLists.txt @@ -6,8 +6,3 @@ add_llvm_library(LLVMPTXAsmPrinter add_dependencies(LLVMPTXAsmPrinter PTXCommonTableGen) -add_llvm_library_dependencies(LLVMPTXAsmPrinter - LLVMMC - LLVMSupport - ) - diff --git a/lib/Target/PTX/InstPrinter/LLVMBuild.txt b/lib/Target/PTX/InstPrinter/LLVMBuild.txt index be89c10..af5d200 100644 --- a/lib/Target/PTX/InstPrinter/LLVMBuild.txt +++ b/lib/Target/PTX/InstPrinter/LLVMBuild.txt @@ -21,4 +21,3 @@ name = PTXAsmPrinter parent = PTX required_libraries = MC Support add_to_library_groups = PTX - diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp index 2f6c92d..5fecb85 100644 --- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp +++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp @@ -38,7 +38,50 @@ StringRef PTXInstPrinter::getOpcodeName(unsigned Opcode) const { } void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - OS << getRegisterName(RegNo); + // Decode the register number into type and offset + unsigned RegSpace = RegNo & 0x7; + unsigned RegType = (RegNo >> 3) & 0x7; + unsigned RegOffset = RegNo >> 6; + + // Print the register + OS << "%"; + + switch (RegSpace) { + default: + llvm_unreachable("Unknown register space!"); + case PTXRegisterSpace::Reg: + switch (RegType) { + default: + llvm_unreachable("Unknown register type!"); + case PTXRegisterType::Pred: + OS << "p"; + break; + case PTXRegisterType::B16: + OS << "rh"; + break; + case PTXRegisterType::B32: + OS << "r"; + break; + case PTXRegisterType::B64: + OS << "rd"; + break; + case PTXRegisterType::F32: + OS << "f"; + break; + case PTXRegisterType::F64: + OS << "fd"; + break; + } + break; + case PTXRegisterSpace::Return: + OS << "ret"; + break; + case PTXRegisterSpace::Argument: + OS << "arg"; + break; + } + + OS << RegOffset; } void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O, @@ -139,6 +182,8 @@ void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } else { O << "0000000000000000"; } + } else if (Op.isReg()) { + printRegName(O, Op.getReg()); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); const MCExpr *Expr = Op.getExpr(); diff --git a/lib/Target/PTX/LLVMBuild.txt b/lib/Target/PTX/LLVMBuild.txt index 22c70de..15a1eb5 100644 --- a/lib/Target/PTX/LLVMBuild.txt +++ b/lib/Target/PTX/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = InstPrinter MCTargetDesc TargetInfo + [component_0] type = TargetGroup name = PTX @@ -27,4 +30,3 @@ name = PTXCodeGen parent = PTX required_libraries = Analysis AsmPrinter CodeGen Core MC PTXDesc PTXInfo SelectionDAG Support Target TransformUtils add_to_library_groups = PTX - diff --git a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt index 94dbcee..d1fd74c 100644 --- a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt @@ -3,11 +3,4 @@ add_llvm_library(LLVMPTXDesc PTXMCAsmInfo.cpp ) -add_llvm_library_dependencies(LLVMPTXDesc - LLVMMC - LLVMPTXAsmPrinter - LLVMPTXInfo - LLVMSupport - ) - add_dependencies(LLVMPTXDesc PTXCommonTableGen) diff --git a/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt b/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt index fff21c1..19b80c5 100644 --- a/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = PTXDesc parent = PTX required_libraries = MC PTXAsmPrinter PTXInfo Support add_to_library_groups = PTX - diff --git a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h index c6094be..77a298d 100644 --- a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h +++ b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h @@ -17,6 +17,8 @@ #ifndef PTXBASEINFO_H #define PTXBASEINFO_H +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "PTXMCTargetDesc.h" namespace llvm { @@ -57,6 +59,75 @@ namespace llvm { RndPosInfInt = 10 // .rpi }; } // namespace PTXII + + namespace PTXRegisterType { + // Register type encoded in MCOperands + enum { + Pred = 0, + B16, + B32, + B64, + F32, + F64 + }; + } // namespace PTXRegisterType + + namespace PTXRegisterSpace { + // Register space encoded in MCOperands + enum { + Reg = 0, + Local, + Param, + Argument, + Return + }; + } + + inline static void decodeRegisterName(raw_ostream &OS, + unsigned EncodedReg) { + OS << "%"; + + unsigned RegSpace = EncodedReg & 0x7; + unsigned RegType = (EncodedReg >> 3) & 0x7; + unsigned RegOffset = EncodedReg >> 6; + + switch (RegSpace) { + default: + llvm_unreachable("Unknown register space!"); + case PTXRegisterSpace::Reg: + switch (RegType) { + default: + llvm_unreachable("Unknown register type!"); + case PTXRegisterType::Pred: + OS << "p"; + break; + case PTXRegisterType::B16: + OS << "rh"; + break; + case PTXRegisterType::B32: + OS << "r"; + break; + case PTXRegisterType::B64: + OS << "rd"; + break; + case PTXRegisterType::F32: + OS << "f"; + break; + case PTXRegisterType::F64: + OS << "fd"; + break; + } + break; + case PTXRegisterSpace::Return: + OS << "ret"; + break; + case PTXRegisterSpace::Argument: + OS << "arg"; + break; + } + + OS << RegOffset; + } } // namespace llvm #endif diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index bdf238b..77ed71d 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -51,23 +51,23 @@ using namespace llvm; static const char PARAM_PREFIX[] = "__param_"; static const char RETURN_PREFIX[] = "__ret_"; -static const char *getRegisterTypeName(unsigned RegNo, - const MachineRegisterInfo& MRI) { - const TargetRegisterClass *TRC = MRI.getRegClass(RegNo); - -#define TEST_REGCLS(cls, clsstr) \ - if (PTX::cls ## RegisterClass == TRC) return # clsstr; - - TEST_REGCLS(RegPred, pred); - TEST_REGCLS(RegI16, b16); - TEST_REGCLS(RegI32, b32); - TEST_REGCLS(RegI64, b64); - TEST_REGCLS(RegF32, b32); - TEST_REGCLS(RegF64, b64); -#undef TEST_REGCLS - - llvm_unreachable("Not in any register class!"); - return NULL; +static const char *getRegisterTypeName(unsigned RegType) { + switch (RegType) { + default: + llvm_unreachable("Unknown register type"); + case PTXRegisterType::Pred: + return ".pred"; + case PTXRegisterType::B16: + return ".b16"; + case PTXRegisterType::B32: + return ".b32"; + case PTXRegisterType::B64: + return ".b64"; + case PTXRegisterType::F32: + return ".f32"; + case PTXRegisterType::F64: + return ".f64"; + } } static const char *getStateSpaceName(unsigned addressSpace) { @@ -188,32 +188,32 @@ void PTXAsmPrinter::EmitFunctionBodyStart() { unsigned numRegs; // pred - numRegs = MFI->getNumRegistersForClass(PTX::RegPredRegisterClass); + numRegs = MFI->countRegisters(PTXRegisterType::Pred, PTXRegisterSpace::Reg); if(numRegs > 0) os << "\t.reg .pred %p<" << numRegs << ">;\n"; // i16 - numRegs = MFI->getNumRegistersForClass(PTX::RegI16RegisterClass); + numRegs = MFI->countRegisters(PTXRegisterType::B16, PTXRegisterSpace::Reg); if(numRegs > 0) os << "\t.reg .b16 %rh<" << numRegs << ">;\n"; // i32 - numRegs = MFI->getNumRegistersForClass(PTX::RegI32RegisterClass); + numRegs = MFI->countRegisters(PTXRegisterType::B32, PTXRegisterSpace::Reg); if(numRegs > 0) os << "\t.reg .b32 %r<" << numRegs << ">;\n"; // i64 - numRegs = MFI->getNumRegistersForClass(PTX::RegI64RegisterClass); + numRegs = MFI->countRegisters(PTXRegisterType::B64, PTXRegisterSpace::Reg); if(numRegs > 0) os << "\t.reg .b64 %rd<" << numRegs << ">;\n"; // f32 - numRegs = MFI->getNumRegistersForClass(PTX::RegF32RegisterClass); + numRegs = MFI->countRegisters(PTXRegisterType::F32, PTXRegisterSpace::Reg); if(numRegs > 0) os << "\t.reg .f32 %f<" << numRegs << ">;\n"; // f64 - numRegs = MFI->getNumRegistersForClass(PTX::RegF64RegisterClass); + numRegs = MFI->countRegisters(PTXRegisterType::F64, PTXRegisterSpace::Reg); if(numRegs > 0) os << "\t.reg .f64 %fd<" << numRegs << ">;\n"; @@ -368,7 +368,6 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() { const PTXParamManager &PM = MFI->getParamManager(); const bool isKernel = MFI->isKernel(); const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); - const MachineRegisterInfo& MRI = MF->getRegInfo(); SmallString<128> decl; raw_svector_ostream os(decl); @@ -391,7 +390,7 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() { if (i != b) os << ", "; - os << ".reg ." << getRegisterTypeName(*i, MRI) << ' ' + os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' ' << MFI->getRegisterName(*i); } } @@ -450,7 +449,7 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() { if (i != b) os << ", "; - os << ".reg ." << getRegisterTypeName(*i, MRI) << ' ' + os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' ' << MFI->getRegisterName(*i); } } @@ -521,20 +520,18 @@ MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO, MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) { MCOperand MCOp; const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); - const MCExpr *Expr; - const char *RegSymbolName; + unsigned EncodedReg; switch (MO.getType()) { default: llvm_unreachable("Unknown operand type"); case MachineOperand::MO_Register: - // We create register operands as symbols, since the PTXInstPrinter class - // has no way to map virtual registers back to a name without some ugly - // hacks. - // FIXME: Figure out a better way to handle virtual register naming. - RegSymbolName = MFI->getRegisterName(MO.getReg()); - Expr = MCSymbolRefExpr::Create(RegSymbolName, MCSymbolRefExpr::VK_None, - OutContext); - MCOp = MCOperand::CreateExpr(Expr); + if (MO.getReg() > 0) { + // Encode the register + EncodedReg = MFI->getEncodedRegister(MO.getReg()); + } else { + EncodedReg = 0; + } + MCOp = MCOperand::CreateReg(EncodedReg); break; case MachineOperand::MO_Immediate: MCOp = MCOperand::CreateImm(MO.getImm()); diff --git a/lib/Target/PTX/PTXFPRoundingModePass.cpp b/lib/Target/PTX/PTXFPRoundingModePass.cpp index 0b653e0..a21d172 100644 --- a/lib/Target/PTX/PTXFPRoundingModePass.cpp +++ b/lib/Target/PTX/PTXFPRoundingModePass.cpp @@ -23,9 +23,11 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +using namespace llvm; + // NOTE: PTXFPRoundingModePass should be executed just before emission. -namespace llvm { +namespace { /// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to /// all FP instructions. Essentially, this pass just looks for all FP /// instructions that have a rounding mode set to RndDefault, and sets an @@ -58,7 +60,7 @@ namespace llvm { void initializeMap(); void processInstruction(MachineInstr &MI); }; // class PTXFPRoundingModePass -} // namespace llvm +} // end anonymous namespace using namespace llvm; diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index 17191fb..a012297 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -243,6 +243,30 @@ SDValue PTXTargetLowering:: for (unsigned i = 0, e = Ins.size(); i != e; ++i) { EVT RegVT = Ins[i].VT; TargetRegisterClass* TRC = getRegClassFor(RegVT); + unsigned RegType; + + // Determine which register class we need + if (RegVT == MVT::i1) { + RegType = PTXRegisterType::Pred; + } + else if (RegVT == MVT::i16) { + RegType = PTXRegisterType::B16; + } + else if (RegVT == MVT::i32) { + RegType = PTXRegisterType::B32; + } + else if (RegVT == MVT::i64) { + RegType = PTXRegisterType::B64; + } + else if (RegVT == MVT::f32) { + RegType = PTXRegisterType::F32; + } + else if (RegVT == MVT::f64) { + RegType = PTXRegisterType::F64; + } + else { + llvm_unreachable("Unknown parameter type"); + } // Use a unique index in the instruction to prevent instruction folding. // Yes, this is a hack. @@ -253,7 +277,7 @@ SDValue PTXTargetLowering:: InVals.push_back(ArgValue); - MFI->addArgReg(Reg); + MFI->addRegister(Reg, RegType, PTXRegisterSpace::Argument); } } @@ -304,25 +328,32 @@ SDValue PTXTargetLowering:: for (unsigned i = 0, e = Outs.size(); i != e; ++i) { EVT RegVT = Outs[i].VT; TargetRegisterClass* TRC = 0; + unsigned RegType; // Determine which register class we need if (RegVT == MVT::i1) { TRC = PTX::RegPredRegisterClass; + RegType = PTXRegisterType::Pred; } else if (RegVT == MVT::i16) { TRC = PTX::RegI16RegisterClass; + RegType = PTXRegisterType::B16; } else if (RegVT == MVT::i32) { TRC = PTX::RegI32RegisterClass; + RegType = PTXRegisterType::B32; } else if (RegVT == MVT::i64) { TRC = PTX::RegI64RegisterClass; + RegType = PTXRegisterType::B64; } else if (RegVT == MVT::f32) { TRC = PTX::RegF32RegisterClass; + RegType = PTXRegisterType::F32; } else if (RegVT == MVT::f64) { TRC = PTX::RegF64RegisterClass; + RegType = PTXRegisterType::F64; } else { llvm_unreachable("Unknown parameter type"); @@ -335,7 +366,7 @@ SDValue PTXTargetLowering:: Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg); - MFI->addRetReg(Reg); + MFI->addRegister(Reg, RegType, PTXRegisterSpace::Return); } } diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp index 1b947a5..871b3a7 100644 --- a/lib/Target/PTX/PTXInstrInfo.cpp +++ b/lib/Target/PTX/PTXInstrInfo.cpp @@ -116,7 +116,7 @@ bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const { } bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - return !isPredicated(MI) && get(MI->getOpcode()).isTerminator(); + return !isPredicated(MI) && MI->isTerminator(); } bool PTXInstrInfo:: @@ -184,15 +184,13 @@ AnalyzeBranch(MachineBasicBlock &MBB, if (MBB.empty()) return true; - MachineBasicBlock::const_iterator iter = MBB.end(); + MachineBasicBlock::iterator iter = MBB.end(); const MachineInstr& instLast1 = *--iter; - const MCInstrDesc &desc1 = instLast1.getDesc(); // for special case that MBB has only 1 instruction const bool IsSizeOne = MBB.size() == 1; // if IsSizeOne is true, *--iter and instLast2 are invalid // we put a dummy value in instLast2 and desc2 since they are used const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter; - const MCInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc(); DEBUG(dbgs() << "\n"); DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n"); @@ -207,7 +205,7 @@ AnalyzeBranch(MachineBasicBlock &MBB, } // this block ends with only an unconditional branch - if (desc1.isUnconditionalBranch() && + if (instLast1.isUnconditionalBranch() && // when IsSizeOne is true, it "absorbs" the evaluation of instLast2 (IsSizeOne || !IsAnyKindOfBranch(instLast2))) { DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n"); @@ -217,7 +215,7 @@ AnalyzeBranch(MachineBasicBlock &MBB, // this block ends with a conditional branch and // it falls through to a successor block - if (desc1.isConditionalBranch() && + if (instLast1.isConditionalBranch() && IsAnySuccessorAlsoLayoutSuccessor(MBB)) { DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n"); TBB = GetBranchTarget(instLast1); @@ -233,8 +231,8 @@ AnalyzeBranch(MachineBasicBlock &MBB, // this block ends with a conditional branch // followed by an unconditional branch - if (desc2.isConditionalBranch() && - desc1.isUnconditionalBranch()) { + if (instLast2.isConditionalBranch() && + instLast1.isUnconditionalBranch()) { DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n"); TBB = GetBranchTarget(instLast2); FBB = GetBranchTarget(instLast1); @@ -341,8 +339,7 @@ void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) { } bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) { - const MCInstrDesc &desc = inst.getDesc(); - return desc.isTerminator() || desc.isBranch() || desc.isIndirectBranch(); + return inst.isTerminator() || inst.isBranch() || inst.isIndirectBranch(); } bool PTXInstrInfo:: diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index bcd5bcf..19a862f 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -825,17 +825,17 @@ let hasSideEffects = 1 in { ///===- Parameter Passing Pseudo-Instructions -----------------------------===// def READPARAMPRED : InstPTX<(outs RegPred:$a), (ins i32imm:$b), - "mov.pred\t$a, %param$b", []>; + "mov.pred\t$a, %arg$b", []>; def READPARAMI16 : InstPTX<(outs RegI16:$a), (ins i32imm:$b), - "mov.b16\t$a, %param$b", []>; + "mov.b16\t$a, %arg$b", []>; def READPARAMI32 : InstPTX<(outs RegI32:$a), (ins i32imm:$b), - "mov.b32\t$a, %param$b", []>; + "mov.b32\t$a, %arg$b", []>; def READPARAMI64 : InstPTX<(outs RegI64:$a), (ins i32imm:$b), - "mov.b64\t$a, %param$b", []>; + "mov.b64\t$a, %arg$b", []>; def READPARAMF32 : InstPTX<(outs RegF32:$a), (ins i32imm:$b), - "mov.f32\t$a, %param$b", []>; + "mov.f32\t$a, %arg$b", []>; def READPARAMF64 : InstPTX<(outs RegF64:$a), (ins i32imm:$b), - "mov.f64\t$a, %param$b", []>; + "mov.f64\t$a, %arg$b", []>; def WRITEPARAMPRED : InstPTX<(outs), (ins RegPred:$a), "//w", []>; def WRITEPARAMI16 : InstPTX<(outs), (ins RegI16:$a), "//w", []>; diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp index b33a273..26ec623 100644 --- a/lib/Target/PTX/PTXMFInfoExtract.cpp +++ b/lib/Target/PTX/PTXMFInfoExtract.cpp @@ -22,9 +22,11 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +using namespace llvm; + // NOTE: PTXMFInfoExtract must after register allocation! -namespace llvm { +namespace { /// PTXMFInfoExtract - PTX specific code to extract of PTX machine /// function information for PTXAsmPrinter /// @@ -42,7 +44,7 @@ namespace llvm { return "PTX Machine Function Info Extractor"; } }; // class PTXMFInfoExtract -} // namespace llvm +} // end anonymous namespace using namespace llvm; @@ -56,7 +58,20 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); const TargetRegisterClass *TRC = MRI.getRegClass(Reg); - MFI->addVirtualRegister(TRC, Reg); + unsigned RegType; + if (TRC == PTX::RegPredRegisterClass) + RegType = PTXRegisterType::Pred; + else if (TRC == PTX::RegI16RegisterClass) + RegType = PTXRegisterType::B16; + else if (TRC == PTX::RegI32RegisterClass) + RegType = PTXRegisterType::B32; + else if (TRC == PTX::RegI64RegisterClass) + RegType = PTXRegisterType::B64; + else if (TRC == PTX::RegF32RegisterClass) + RegType = PTXRegisterType::F32; + else if (TRC == PTX::RegF64RegisterClass) + RegType = PTXRegisterType::F64; + MFI->addRegister(Reg, RegType, PTXRegisterSpace::Reg); } return false; diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h index 3b985f7..1a2878c 100644 --- a/lib/Target/PTX/PTXMachineFunctionInfo.h +++ b/lib/Target/PTX/PTXMachineFunctionInfo.h @@ -35,15 +35,22 @@ private: DenseSet<unsigned> RegArgs; DenseSet<unsigned> RegRets; - typedef std::vector<unsigned> RegisterList; - typedef DenseMap<const TargetRegisterClass*, RegisterList> RegisterMap; - typedef DenseMap<unsigned, std::string> RegisterNameMap; typedef DenseMap<int, std::string> FrameMap; - RegisterMap UsedRegs; - RegisterNameMap RegNames; FrameMap FrameSymbols; + struct RegisterInfo { + unsigned Reg; + unsigned Type; + unsigned Space; + unsigned Offset; + unsigned Encoded; + }; + + typedef DenseMap<unsigned, RegisterInfo> RegisterInfoMap; + + RegisterInfoMap RegInfo; + PTXParamManager ParamManager; public: @@ -51,13 +58,7 @@ public: PTXMachineFunctionInfo(MachineFunction &MF) : IsKernel(false) { - UsedRegs[PTX::RegPredRegisterClass] = RegisterList(); - UsedRegs[PTX::RegI16RegisterClass] = RegisterList(); - UsedRegs[PTX::RegI32RegisterClass] = RegisterList(); - UsedRegs[PTX::RegI64RegisterClass] = RegisterList(); - UsedRegs[PTX::RegF32RegisterClass] = RegisterList(); - UsedRegs[PTX::RegF64RegisterClass] = RegisterList(); - } + } /// getParamManager - Returns the PTXParamManager instance for this function. PTXParamManager& getParamManager() { return ParamManager; } @@ -78,69 +79,106 @@ public: reg_iterator retreg_begin() const { return RegRets.begin(); } reg_iterator retreg_end() const { return RegRets.end(); } + /// addRegister - Adds a virtual register to the set of all used registers + void addRegister(unsigned Reg, unsigned RegType, unsigned RegSpace) { + if (!RegInfo.count(Reg)) { + RegisterInfo Info; + Info.Reg = Reg; + Info.Type = RegType; + Info.Space = RegSpace; + + // Determine register offset + Info.Offset = 0; + for(RegisterInfoMap::const_iterator i = RegInfo.begin(), + e = RegInfo.end(); i != e; ++i) { + const RegisterInfo& RI = i->second; + if (RI.Space == RegSpace) + if (RI.Space != PTXRegisterSpace::Reg || RI.Type == Info.Type) + Info.Offset++; + } + + // Encode the register data into a single register number + Info.Encoded = (Info.Offset << 6) | (Info.Type << 3) | Info.Space; + + RegInfo[Reg] = Info; + + if (RegSpace == PTXRegisterSpace::Argument) + RegArgs.insert(Reg); + else if (RegSpace == PTXRegisterSpace::Return) + RegRets.insert(Reg); + } + } + + /// countRegisters - Returns the number of registers of the given type and + /// space. + unsigned countRegisters(unsigned RegType, unsigned RegSpace) const { + unsigned Count = 0; + for(RegisterInfoMap::const_iterator i = RegInfo.begin(), e = RegInfo.end(); + i != e; ++i) { + const RegisterInfo& RI = i->second; + if (RI.Type == RegType && RI.Space == RegSpace) + Count++; + } + return Count; + } + + /// getEncodedRegister - Returns the encoded value of the register. + unsigned getEncodedRegister(unsigned Reg) const { + return RegInfo.lookup(Reg).Encoded; + } + /// addRetReg - Adds a register to the set of return-value registers. void addRetReg(unsigned Reg) { if (!RegRets.count(Reg)) { RegRets.insert(Reg); - std::string name; - name = "%ret"; - name += utostr(RegRets.size() - 1); - RegNames[Reg] = name; } } /// addArgReg - Adds a register to the set of function argument registers. void addArgReg(unsigned Reg) { RegArgs.insert(Reg); - std::string name; - name = "%param"; - name += utostr(RegArgs.size() - 1); - RegNames[Reg] = name; - } - - /// addVirtualRegister - Adds a virtual register to the set of all used - /// registers in the function. - void addVirtualRegister(const TargetRegisterClass *TRC, unsigned Reg) { - std::string name; - - // Do not count registers that are argument/return registers. - if (!RegRets.count(Reg) && !RegArgs.count(Reg)) { - UsedRegs[TRC].push_back(Reg); - if (TRC == PTX::RegPredRegisterClass) - name = "%p"; - else if (TRC == PTX::RegI16RegisterClass) - name = "%rh"; - else if (TRC == PTX::RegI32RegisterClass) - name = "%r"; - else if (TRC == PTX::RegI64RegisterClass) - name = "%rd"; - else if (TRC == PTX::RegF32RegisterClass) - name = "%f"; - else if (TRC == PTX::RegF64RegisterClass) - name = "%fd"; - else - llvm_unreachable("Invalid register class"); - - name += utostr(UsedRegs[TRC].size() - 1); - RegNames[Reg] = name; - } } /// getRegisterName - Returns the name of the specified virtual register. This /// name is used during PTX emission. - const char *getRegisterName(unsigned Reg) const { - if (RegNames.count(Reg)) - return RegNames.find(Reg)->second.c_str(); + std::string getRegisterName(unsigned Reg) const { + if (RegInfo.count(Reg)) { + const RegisterInfo& RI = RegInfo.lookup(Reg); + std::string Name; + raw_string_ostream NameStr(Name); + decodeRegisterName(NameStr, RI.Encoded); + NameStr.flush(); + return Name; + } else if (Reg == PTX::NoRegister) return "%noreg"; else llvm_unreachable("Register not in register name map"); } - /// getNumRegistersForClass - Returns the number of virtual registers that are - /// used for the specified register class. - unsigned getNumRegistersForClass(const TargetRegisterClass *TRC) const { - return UsedRegs.lookup(TRC).size(); + /// getEncodedRegisterName - Returns the name of the encoded register. + std::string getEncodedRegisterName(unsigned EncodedReg) const { + std::string Name; + raw_string_ostream NameStr(Name); + decodeRegisterName(NameStr, EncodedReg); + NameStr.flush(); + return Name; + } + + /// getRegisterType - Returns the type of the specified virtual register. + unsigned getRegisterType(unsigned Reg) const { + if (RegInfo.count(Reg)) + return RegInfo.lookup(Reg).Type; + else + llvm_unreachable("Unknown register"); + } + + /// getOffsetForRegister - Returns the offset of the virtual register + unsigned getOffsetForRegister(unsigned Reg) const { + if (RegInfo.count(Reg)) + return RegInfo.lookup(Reg).Offset; + else + return 0; } /// getFrameSymbol - Returns the symbol name for the given FrameIndex. @@ -148,13 +186,13 @@ public: if (FrameSymbols.count(FrameIndex)) { return FrameSymbols.lookup(FrameIndex).c_str(); } else { - std::string Name = "__local"; - Name += utostr(FrameIndex); + std::string Name = "__local"; + Name += utostr(FrameIndex); // The whole point of caching this name is to ensure the pointer we pass // to any getExternalSymbol() calls will remain valid for the lifetime of // the back-end instance. This is to work around an issue in SelectionDAG // where symbol names are expected to be life-long strings. - FrameSymbols[FrameIndex] = Name; + FrameSymbols[FrameIndex] = Name; return FrameSymbols[FrameIndex].c_str(); } } diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp index 292ea5e..4efdc27 100644 --- a/lib/Target/PTX/PTXTargetMachine.cpp +++ b/lib/Target/PTX/PTXTargetMachine.cpp @@ -67,30 +67,16 @@ namespace { "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; const char* DataLayout64 = "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; - - // Copied from LLVMTargetMachine.cpp - void printNoVerify(PassManagerBase &PM, const char *Banner) { - if (PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); - } - - void printAndVerify(PassManagerBase &PM, - const char *Banner) { - if (PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); - - //if (VerifyMachineCode) - // PM.add(createMachineVerifierPass(Banner)); - } } // DataLayout and FrameLowering are filled with dummy data PTXTargetMachine::PTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), DataLayout(is64Bit ? DataLayout64 : DataLayout32), Subtarget(TT, CPU, FS, is64Bit), FrameLowering(Subtarget), @@ -101,16 +87,18 @@ PTXTargetMachine::PTXTargetMachine(const Target &T, PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : PTXTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) { + : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { } PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : PTXTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) { + : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) { } bool PTXTargetMachine::addInstSelector(PassManagerBase &PM) { diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h index 19f6c0f..22911f7 100644 --- a/lib/Target/PTX/PTXTargetMachine.h +++ b/lib/Target/PTX/PTXTargetMachine.h @@ -35,7 +35,7 @@ class PTXTargetMachine : public LLVMTargetMachine { public: PTXTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64Bit); @@ -94,7 +94,7 @@ class PTX32TargetMachine : public PTXTargetMachine { public: PTX32TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; // class PTX32TargetMachine @@ -103,7 +103,7 @@ class PTX64TargetMachine : public PTXTargetMachine { public: PTX64TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; // class PTX32TargetMachine diff --git a/lib/Target/PTX/TargetInfo/CMakeLists.txt b/lib/Target/PTX/TargetInfo/CMakeLists.txt index 2366e45..d9a5da3 100644 --- a/lib/Target/PTX/TargetInfo/CMakeLists.txt +++ b/lib/Target/PTX/TargetInfo/CMakeLists.txt @@ -4,10 +4,4 @@ add_llvm_library(LLVMPTXInfo PTXTargetInfo.cpp ) -add_llvm_library_dependencies(LLVMPTXInfo - LLVMMC - LLVMSupport - LLVMTarget - ) - add_dependencies(LLVMPTXInfo PTXCommonTableGen) diff --git a/lib/Target/PTX/TargetInfo/LLVMBuild.txt b/lib/Target/PTX/TargetInfo/LLVMBuild.txt index 8e5285a..2cc30c4 100644 --- a/lib/Target/PTX/TargetInfo/LLVMBuild.txt +++ b/lib/Target/PTX/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = PTXInfo parent = PTX required_libraries = MC Support Target add_to_library_groups = PTX - diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 05c1ffd..1b85495 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -27,20 +27,6 @@ add_llvm_target(PowerPCCodeGen PPCSelectionDAGInfo.cpp ) -add_llvm_library_dependencies(LLVMPowerPCCodeGen - LLVMAnalysis - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMC - LLVMPowerPCAsmPrinter - LLVMPowerPCDesc - LLVMPowerPCInfo - LLVMSelectionDAG - LLVMSupport - LLVMTarget - ) - add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt index 1d857e2..a605cc4 100644 --- a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt +++ b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt @@ -4,9 +4,4 @@ add_llvm_library(LLVMPowerPCAsmPrinter PPCInstPrinter.cpp ) -add_llvm_library_dependencies(LLVMPowerPCAsmPrinter - LLVMMC - LLVMSupport - ) - add_dependencies(LLVMPowerPCAsmPrinter PowerPCCommonTableGen) diff --git a/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt b/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt index afbb2b1..7c691de 100644 --- a/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt +++ b/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt @@ -21,4 +21,3 @@ name = PowerPCAsmPrinter parent = PowerPC required_libraries = MC Support add_to_library_groups = PowerPC - diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt index 5baa988..95fac54 100644 --- a/lib/Target/PowerPC/LLVMBuild.txt +++ b/lib/Target/PowerPC/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = InstPrinter MCTargetDesc TargetInfo + [component_0] type = TargetGroup name = PowerPC @@ -28,4 +31,3 @@ name = PowerPCCodeGen parent = PowerPC required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo SelectionDAG Support Target add_to_library_groups = PowerPC - diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt index c4041db..febf438 100644 --- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt @@ -6,11 +6,4 @@ add_llvm_library(LLVMPowerPCDesc PPCPredicates.cpp ) -add_llvm_library_dependencies(LLVMPowerPCDesc - LLVMMC - LLVMPowerPCAsmPrinter - LLVMPowerPCInfo - LLVMSupport - ) - add_dependencies(LLVMPowerPCDesc PowerPCCommonTableGen) diff --git a/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt b/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt index fc2da83..d3a567d 100644 --- a/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = PowerPCDesc parent = PowerPC required_libraries = MC PowerPCAsmPrinter PowerPCInfo Support add_to_library_groups = PowerPC - diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 9f2fd6d..34a5774 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -93,6 +93,16 @@ public: // FIXME. return false; } + + bool fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const { + // FIXME. + assert(0 && "RelaxInstruction() unimplemented"); + return false; + } + void RelaxInstruction(const MCInst &Inst, MCInst &Res) const { // FIXME. diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 56f622e..5dc2d3d 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -365,11 +365,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::MFCRpseud: + case PPC::MFCR8pseud: // Transform: %R3 = MFCRpseud %CR7 // Into: %R3 = MFCR ;; cr7 OutStreamer.AddComment(PPCInstPrinter:: getRegisterName(MI->getOperand(1).getReg())); - TmpInst.setOpcode(PPC::MFCR); + TmpInst.setOpcode(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); OutStreamer.EmitInstruction(TmpInst); return; @@ -441,7 +442,7 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { Directive = PPC::DIR_970; if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400) Directive = PPC::DIR_7400; - if (Subtarget.isPPC64() && Directive < PPC::DIR_970) + if (Subtarget.isPPC64() && Directive < PPC::DIR_64) Directive = PPC::DIR_64; assert(Directive <= PPC::DIR_64 && "Directive out of range."); diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index 4a1f182..9d2f4d0 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -138,7 +138,8 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) { unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const { const MachineOperand &MO = MI.getOperand(OpNo); - assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) && + assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 || + MI.getOpcode() == PPC::MFOCRF) && (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); return 0x80 >> getPPCRegisterNumbering(MO.getReg()); } @@ -248,7 +249,8 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, if (MO.isReg()) { // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand. // The GPR operand should come through here though. - assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) || + assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 && + MI.getOpcode() != PPC::MFOCRF) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); return getPPCRegisterNumbering(MO.getReg()); } diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 0b85fea..5c45018 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -64,7 +64,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) { // epilog blocks. for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { // If last instruction is a return instruction, add an epilogue - if (!I->empty() && I->back().getDesc().isReturn()) { + if (!I->empty() && I->back().isReturn()) { bool FoundIt = false; for (MBBI = I->end(); MBBI != I->begin(); ) { --MBBI; @@ -244,8 +244,10 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { if (MF.getFunction()->hasFnAttr(Attribute::Naked)) return false; - return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() || - (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall()); + return MF.getTarget().Options.DisableFramePointerElim(MF) || + MFI->hasVarSizedObjects() || + (MF.getTarget().Options.GuaranteedTailCallOpt && + MF.getInfo<PPCFunctionInfo>()->hasFastCall()); } @@ -655,7 +657,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // Callee pop calling convention. Pop parameter/linkage area. Used for tail // call optimization - if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR && + if (MF.getTarget().Options.GuaranteedTailCallOpt && RetOpcode == PPC::BLR && MF.getFunction()->getCallingConv() == CallingConv::Fast) { PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); unsigned CallerAllocatedAmt = FI->getMinReservedArea(); @@ -758,7 +760,8 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Reserve stack space to move the linkage area to in case of a tail call. int TCSPDelta = 0; - if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) { + if (MF.getTarget().Options.GuaranteedTailCallOpt && + (TCSPDelta = FI->getTailCallSPDelta()) < 0) { MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); } @@ -769,7 +772,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // FIXME: doesn't detect whether or not we need to spill vXX, which requires // r0 for now. - if (RegInfo->requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable. + if (RegInfo->requiresRegisterScavenging(MF)) if (needsFP(MF) || spillsCR(MF)) { const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; @@ -863,7 +866,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) // Take into account stack space reserved for tail calls. int TCSPDelta = 0; - if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { + if (MF.getTarget().Options.GuaranteedTailCallOpt && + (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { LowerBound = TCSPDelta; } diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 3197fc8..ae317af 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -27,7 +27,6 @@ void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) { const MCInstrDesc *MCID = DAG->getInstrDesc(SU); if (!MCID) { // This is a PPC pseudo-instruction. - // FIXME: Should something else be done? return; } @@ -62,6 +61,7 @@ void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) { PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii) : TII(tii) { + LastWasBL8_ELF = false; EndDispatchGroup(); } @@ -80,12 +80,6 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode, bool &isFirst, bool &isSingle, bool &isCracked, bool &isLoad, bool &isStore) { - if ((int)Opcode >= 0) { - isFirst = isSingle = isCracked = isLoad = isStore = false; - return PPCII::PPC970_Pseudo; - } - Opcode = ~Opcode; - const MCInstrDesc &MCID = TII.get(Opcode); isLoad = MCID.mayLoad(); @@ -102,29 +96,23 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode, /// isLoadOfStoredAddress - If we have a load from the previously stored pointer /// as indicated by StorePtr1/StorePtr2/StoreSize, return true. bool PPCHazardRecognizer970:: -isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const { +isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset, + const Value *LoadValue) const { for (unsigned i = 0, e = NumStores; i != e; ++i) { // Handle exact and commuted addresses. - if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i]) - return true; - if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i]) + if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i]) return true; // Okay, we don't have an exact match, if this is an indexed offset, see if // we have overlap (which happens during fp->int conversion for example). - if (StorePtr2[i] == Ptr2) { - if (ConstantSDNode *StoreOffset = dyn_cast<ConstantSDNode>(StorePtr1[i])) - if (ConstantSDNode *LoadOffset = dyn_cast<ConstantSDNode>(Ptr1)) { - // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check - // to see if the load and store actually overlap. - int StoreOffs = StoreOffset->getZExtValue(); - int LoadOffs = LoadOffset->getZExtValue(); - if (StoreOffs < LoadOffs) { - if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true; - } else { - if (int(LoadOffs+LoadSize) > StoreOffs) return true; - } - } + if (StoreValue[i] == LoadValue) { + // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check + // to see if the load and store actually overlap. + if (StoreOffset[i] < LoadOffset) { + if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true; + } else { + if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true; + } } } return false; @@ -138,13 +126,26 @@ ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970:: getHazardType(SUnit *SU, int Stalls) { assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead"); - const SDNode *Node = SU->getNode()->getGluedMachineNode(); + MachineInstr *MI = SU->getInstr(); + + if (MI->isDebugValue()) + return NoHazard; + + unsigned Opcode = MI->getOpcode(); + + // If the last instruction was a BL8_ELF, then the NOP must follow it + // directly (this is strong requirement from the linker due to the ELF ABI). + // We return only Hazard (and not NoopHazard) because if the NOP is necessary + // then it will already be in the instruction stream (it is not always + // necessary; tail calls, for example, do not need it). + if (LastWasBL8_ELF && Opcode != PPC::NOP) + return Hazard; + bool isFirst, isSingle, isCracked, isLoad, isStore; PPCII::PPC970_Unit InstrType = - GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked, + GetInstrType(Opcode, isFirst, isSingle, isCracked, isLoad, isStore); if (InstrType == PPCII::PPC970_Pseudo) return NoHazard; - unsigned Opcode = Node->getMachineOpcode(); // We can only issue a PPC970_First/PPC970_Single instruction (such as // crand/mtspr/etc) if this is the first cycle of the dispatch group. @@ -181,55 +182,10 @@ getHazardType(SUnit *SU, int Stalls) { // If this is a load following a store, make sure it's not to the same or // overlapping address. - if (isLoad && NumStores) { - unsigned LoadSize; - switch (Opcode) { - default: llvm_unreachable("Unknown load!"); - case PPC::LBZ: case PPC::LBZU: - case PPC::LBZX: - case PPC::LBZ8: case PPC::LBZU8: - case PPC::LBZX8: - case PPC::LVEBX: - LoadSize = 1; - break; - case PPC::LHA: case PPC::LHAU: - case PPC::LHAX: - case PPC::LHZ: case PPC::LHZU: - case PPC::LHZX: - case PPC::LVEHX: - case PPC::LHBRX: - case PPC::LHA8: case PPC::LHAU8: - case PPC::LHAX8: - case PPC::LHZ8: case PPC::LHZU8: - case PPC::LHZX8: - LoadSize = 2; - break; - case PPC::LFS: case PPC::LFSU: - case PPC::LFSX: - case PPC::LWZ: case PPC::LWZU: - case PPC::LWZX: - case PPC::LWA: - case PPC::LWAX: - case PPC::LVEWX: - case PPC::LWBRX: - case PPC::LWZ8: - case PPC::LWZX8: - LoadSize = 4; - break; - case PPC::LFD: case PPC::LFDU: - case PPC::LFDX: - case PPC::LD: case PPC::LDU: - case PPC::LDX: - LoadSize = 8; - break; - case PPC::LVX: - case PPC::LVXL: - LoadSize = 16; - break; - } - - if (isLoadOfStoredAddress(LoadSize, - Node->getOperand(0), Node->getOperand(1))) + if (isLoad && NumStores && !MI->memoperands_empty()) { + MachineMemOperand *MO = *MI->memoperands_begin(); + if (isLoadOfStoredAddress(MO->getSize(), + MO->getOffset(), MO->getValue())) return NoopHazard; } @@ -237,66 +193,29 @@ getHazardType(SUnit *SU, int Stalls) { } void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { - const SDNode *Node = SU->getNode()->getGluedMachineNode(); + MachineInstr *MI = SU->getInstr(); + + if (MI->isDebugValue()) + return; + + unsigned Opcode = MI->getOpcode(); + LastWasBL8_ELF = (Opcode == PPC::BL8_ELF); + bool isFirst, isSingle, isCracked, isLoad, isStore; PPCII::PPC970_Unit InstrType = - GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked, + GetInstrType(Opcode, isFirst, isSingle, isCracked, isLoad, isStore); if (InstrType == PPCII::PPC970_Pseudo) return; - unsigned Opcode = Node->getMachineOpcode(); // Update structural hazard information. if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true; // Track the address stored to. - if (isStore) { - unsigned ThisStoreSize; - switch (Opcode) { - default: llvm_unreachable("Unknown store instruction!"); - case PPC::STB: case PPC::STB8: - case PPC::STBU: case PPC::STBU8: - case PPC::STBX: case PPC::STBX8: - case PPC::STVEBX: - ThisStoreSize = 1; - break; - case PPC::STH: case PPC::STH8: - case PPC::STHU: case PPC::STHU8: - case PPC::STHX: case PPC::STHX8: - case PPC::STVEHX: - case PPC::STHBRX: - ThisStoreSize = 2; - break; - case PPC::STFS: - case PPC::STFSU: - case PPC::STFSX: - case PPC::STWX: case PPC::STWX8: - case PPC::STWUX: - case PPC::STW: case PPC::STW8: - case PPC::STWU: - case PPC::STVEWX: - case PPC::STFIWX: - case PPC::STWBRX: - ThisStoreSize = 4; - break; - case PPC::STD_32: - case PPC::STDX_32: - case PPC::STD: - case PPC::STDU: - case PPC::STFD: - case PPC::STFDX: - case PPC::STDX: - case PPC::STDUX: - ThisStoreSize = 8; - break; - case PPC::STVX: - case PPC::STVXL: - ThisStoreSize = 16; - break; - } - - StoreSize[NumStores] = ThisStoreSize; - StorePtr1[NumStores] = Node->getOperand(1); - StorePtr2[NumStores] = Node->getOperand(2); + if (isStore && NumStores < 4 && !MI->memoperands_empty()) { + MachineMemOperand *MO = *MI->memoperands_begin(); + StoreSize[NumStores] = MO->getSize(); + StoreOffset[NumStores] = MO->getOffset(); + StoreValue[NumStores] = MO->getValue(); ++NumStores; } @@ -319,3 +238,9 @@ void PPCHazardRecognizer970::AdvanceCycle() { if (NumIssued == 5) EndDispatchGroup(); } + +void PPCHazardRecognizer970::Reset() { + LastWasBL8_ELF = false; + EndDispatchGroup(); +} + diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h index 32fac91..95d0d64 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -49,14 +49,18 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { // HasCTRSet - If the CTR register is set in this group, disallow BCTRL. bool HasCTRSet; + // Was the last instruction issued a BL8_ELF + bool LastWasBL8_ELF; + // StoredPtr - Keep track of the address of any store. If we see a load from // the same address (or one that aliases it), disallow the store. We can have // up to four stores in one dispatch group, hence we track up to 4. // // This is null if we haven't seen a store yet. We keep track of both // operands of the store here, since we support [r+r] and [r+i] addressing. - SDValue StorePtr1[4], StorePtr2[4]; - unsigned StoreSize[4]; + const Value *StoreValue[4]; + int64_t StoreOffset[4]; + uint64_t StoreSize[4]; unsigned NumStores; public: @@ -64,6 +68,7 @@ public: virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void EmitInstruction(SUnit *SU); virtual void AdvanceCycle(); + virtual void Reset(); private: /// EndDispatchGroup - Called when we are finishing a new dispatch group. @@ -76,8 +81,8 @@ private: bool &isFirst, bool &isSingle,bool &isCracked, bool &isLoad, bool &isStore); - bool isLoadOfStoredAddress(unsigned LoadSize, - SDValue Ptr1, SDValue Ptr2) const; + bool isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset, + const Value *LoadValue) const; }; } // end namespace llvm diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 3dee406..4a509a3 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -210,13 +210,13 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { // Find all return blocks, outputting a restore in each epilog. for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { - if (!BB->empty() && BB->back().getDesc().isReturn()) { + if (!BB->empty() && BB->back().isReturn()) { IP = BB->end(); --IP; // Skip over all terminator instructions, which are part of the return // sequence. MachineBasicBlock::iterator I2 = IP; - while (I2 != BB->begin() && (--I2)->getDesc().isTerminator()) + while (I2 != BB->begin() && (--I2)->isTerminator()) IP = I2; // Emit: MTVRSAVE InVRSave @@ -1066,7 +1066,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Target = N->getOperand(1); unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8; unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8; - Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Target, + Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target, Chain), 0); return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 36d5c41..f3a3d17 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -103,6 +103,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // from FP_ROUND: that rounds to nearest, this rounds to zero. setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); + // We do not currently implment this libm ops for PowerPC. + setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); + setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); + setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); + setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); + // PowerPC has no SREM/UREM instructions setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); @@ -146,9 +153,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BSWAP, MVT::i32 , Expand); setOperationAction(ISD::CTPOP, MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::BSWAP, MVT::i64 , Expand); setOperationAction(ISD::CTPOP, MVT::i64 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); // PowerPC does not have ROTR setOperationAction(ISD::ROTR, MVT::i32 , Expand); @@ -332,7 +343,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTLZ, VT, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); } // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle @@ -1667,7 +1680,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4( EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Potential tail calls could cause overwriting of argument stack slots. - bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast)); + bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && + (CallConv == CallingConv::Fast)); unsigned PtrByteSize = 4; // Assign locations to all of the incoming arguments. @@ -1857,7 +1871,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; // Potential tail calls could cause overwriting of argument stack slots. - bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast)); + bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && + (CallConv == CallingConv::Fast)); unsigned PtrByteSize = isPPC64 ? 8 : 4; unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); @@ -2263,9 +2278,9 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); // Tail call needs the stack to be aligned. - if (CC==CallingConv::Fast && GuaranteedTailCallOpt) { - unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()-> - getStackAlignment(); + if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){ + unsigned TargetAlign = DAG.getMachineFunction().getTarget(). + getFrameLowering()->getStackAlignment(); unsigned AlignMask = TargetAlign-1; NumBytes = (NumBytes + AlignMask) & ~AlignMask; } @@ -2299,7 +2314,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const { - if (!GuaranteedTailCallOpt) + if (!getTargetMachine().Options.GuaranteedTailCallOpt) return false; // Variable argument functions are not supported. @@ -2752,7 +2767,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // the stack. Account for this here so these bytes can be pushed back on in // PPCRegisterInfo::eliminateCallFramePseudoInstr. int BytesCalleePops = - (CallConv==CallingConv::Fast && GuaranteedTailCallOpt) ? NumBytes : 0; + (CallConv == CallingConv::Fast && + getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; if (InFlag.getNode()) Ops.push_back(InFlag); @@ -2868,7 +2884,8 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). - if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast) + if (getTargetMachine().Options.GuaranteedTailCallOpt && + CallConv == CallingConv::Fast) MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); // Count how many bytes are to be pushed on the stack, including the linkage @@ -3075,7 +3092,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). - if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast) + if (getTargetMachine().Options.GuaranteedTailCallOpt && + CallConv == CallingConv::Fast) MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); unsigned nAltivecParamsAtEnd = 0; @@ -5754,7 +5772,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); - bool is31 = (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) && + bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) || + MFI->hasVarSizedObjects()) && MFI->getStackSize() && !MF.getFunction()->hasFnAttr(Attribute::Naked); unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) : diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index e88ad37..cdbc264 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -223,6 +223,18 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm), def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), (TCRETURNri8 CTRRC8:$dst, imm:$imm)>; +// 64-but CR instructions +def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS), + "mtcrf $FXM, $rS", BrMCRX>, + PPC970_MicroCode, PPC970_Unit_CRU; + +def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM), + "", SprMFCR>, + PPC970_MicroCode, PPC970_Unit_CRU; + +def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins), + "mfcr $rT", SprMFCR>, + PPC970_MicroCode, PPC970_Unit_CRU; //===----------------------------------------------------------------------===// // 64-bit SPR manipulation instrs. @@ -469,6 +481,12 @@ def RLDICR : MDForm_1<30, 1, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME), "rldicr $rA, $rS, $SH, $ME", IntRotateD, []>, isPPC64; + +def RLWINM8 : MForm_2<21, + (outs G8RC:$rA), (ins G8RC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), + "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral, + []>; + } // End FXU Operations. diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index b9a6297..6d16f1d 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -33,8 +33,8 @@ #include "PPCGenInstrInfo.inc" namespace llvm { -extern cl::opt<bool> EnablePPC32RS; // FIXME (64-bit): See PPCRegisterInfo.cpp. -extern cl::opt<bool> EnablePPC64RS; // FIXME (64-bit): See PPCRegisterInfo.cpp. +extern cl::opt<bool> DisablePPC32RS; +extern cl::opt<bool> DisablePPC64RS; } using namespace llvm; @@ -48,25 +48,32 @@ PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm) ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( const TargetMachine *TM, const ScheduleDAG *DAG) const { - // Should use subtarget info to pick the right hazard recognizer. For - // now, always return a PPC970 recognizer. - const TargetInstrInfo *TII = TM->getInstrInfo(); - (void)TII; - assert(TII && "No InstrInfo?"); - unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective(); if (Directive == PPC::DIR_440) { const InstrItineraryData *II = TM->getInstrItineraryData(); return new PPCHazardRecognizer440(II, DAG); } - else { - // Disable the hazard recognizer for now, as it doesn't support - // bottom-up scheduling. - //return new PPCHazardRecognizer970(*TII); - return new ScheduleHazardRecognizer(); - } + + return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG); } +/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer +/// to use for this target when scheduling the DAG. +ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( + const InstrItineraryData *II, + const ScheduleDAG *DAG) const { + unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); + + // Most subtargets use a PPC970 recognizer. + if (Directive != PPC::DIR_440) { + const TargetInstrInfo *TII = TM.getInstrInfo(); + assert(TII && "No InstrInfo?"); + + return new PPCHazardRecognizer970(*TII); + } + + return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG); +} unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { switch (MI->getOpcode()) { @@ -338,6 +345,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc)); } +// This function returns true if a CR spill is necessary and false otherwise. bool PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, @@ -369,7 +377,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, FrameIdx)); } else { // FIXME: this spills LR immediately to memory in one step. To do this, - // we use R11, which we know cannot be used in the prolog/epilog. This is + // we use X11, which we know cannot be used in the prolog/epilog. This is // a hack. NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11)); NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) @@ -388,9 +396,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) { - if ((EnablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || - (EnablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { - // FIXME (64-bit): Enable + if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || + (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) .addReg(SrcReg, getKillRegState(isKill)), @@ -403,11 +410,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, // We hack this on Darwin by reserving R2. It's probably broken on Linux // at the moment. + bool is64Bit = TM.getSubtargetImpl()->isPPC64(); // We need to store the CR in the low 4-bits of the saved value. First, // issue a MFCR to save all of the CRBits. unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? - PPC::R2 : PPC::R0; - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg) + (is64Bit ? PPC::X2 : PPC::R2) : + (is64Bit ? PPC::X0 : PPC::R0); + NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud : + PPC::MFCRpseud), ScratchReg) .addReg(SrcReg, getKillRegState(isKill))); // If the saved register wasn't CR0, shift the bits left so that they are @@ -415,12 +425,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, if (SrcReg != PPC::CR0) { unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4; // rlwinm scratch, scratch, ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) + NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 : + PPC::RLWINM), ScratchReg) .addReg(ScratchReg).addImm(ShiftBits) .addImm(0).addImm(31)); } - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ? + PPC::STW8 : PPC::STW)) .addReg(ScratchReg, getKillRegState(isKill)), FrameIdx)); @@ -504,7 +516,7 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, NewMIs.back()->addMemOperand(MF, MMO); } -void +bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, @@ -524,8 +536,8 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, FrameIdx)); } else { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), - PPC::R11), FrameIdx)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::R11)); + PPC::X11), FrameIdx)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::X11)); } } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg), @@ -534,28 +546,37 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg), FrameIdx)); } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) { - // FIXME: We need a scatch reg here. The trouble with using R0 is that - // it's possible for the stack frame to be so big the save location is - // out of range of immediate offsets, necessitating another register. - // We hack this on Darwin by reserving R2. It's probably broken on Linux - // at the moment. - unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? - PPC::R2 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), - ScratchReg), FrameIdx)); - - // If the reloaded register isn't CR0, shift the bits right so that they are - // in the right CR's slot. - if (DestReg != PPC::CR0) { - unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; - // rlwinm r11, r11, 32-ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) - .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0) - .addImm(31)); + if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || + (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, + get(PPC::RESTORE_CR), DestReg) + , FrameIdx)); + return true; + } else { + // FIXME: We need a scatch reg here. The trouble with using R0 is that + // it's possible for the stack frame to be so big the save location is + // out of range of immediate offsets, necessitating another register. + // We hack this on Darwin by reserving R2. It's probably broken on Linux + // at the moment. + unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? + PPC::R2 : PPC::R0; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), + ScratchReg), FrameIdx)); + + // If the reloaded register isn't CR0, shift the bits right so that they are + // in the right CR's slot. + if (DestReg != PPC::CR0) { + unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; + // rlwinm r11, r11, 32-ShiftBits, 0, 31. + NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) + .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0) + .addImm(31)); + } + + NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ? + PPC::MTCRF8 : PPC::MTCRF), DestReg) + .addReg(ScratchReg)); } - - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg) - .addReg(ScratchReg)); } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) { unsigned Reg = 0; @@ -600,6 +621,8 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, } else { llvm_unreachable("Unknown regclass!"); } + + return false; } void @@ -612,7 +635,10 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, SmallVector<MachineInstr*, 4> NewMIs; DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); - LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs); + if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) { + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setSpillsCR(); + } for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 90bacc9..e90f8cb 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -72,7 +72,7 @@ class PPCInstrInfo : public PPCGenInstrInfo { unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs) const; - void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, + bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs) const; @@ -88,6 +88,9 @@ public: ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetMachine *TM, const ScheduleDAG *DAG) const; + ScheduleHazardRecognizer * + CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const; unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 17f63e0..d4c9d10 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -349,7 +349,7 @@ def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>; //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. -def FPContractions : Predicate<"!NoExcessFPPrecision">; +def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">; def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">; def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">; def IsBookE : Predicate<"PPCSubTarget.isBookE()">; @@ -399,7 +399,14 @@ let usesCustomInserter = 1, // Expanded after instruction selection. // SPILL_CR - Indicate that we're dumping the CR register, so we'll need to // scavenge a register for it. -def SPILL_CR : Pseudo<(outs), (ins GPRC:$cond, memri:$F), +let mayStore = 1 in +def SPILL_CR : Pseudo<(outs), (ins CRRC:$cond, memri:$F), + "", []>; + +// RESTORE_CR - Indicate that we're restoring the CR register (previously +// spilled), so we'll need to scavenge a register for it. +let mayLoad = 1 in +def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F), "", []>; let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { @@ -1091,7 +1098,7 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins), "mfspr $rT, 256", IntGeneral>, PPC970_DGroup_First, PPC970_Unit_FXU; -def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS), +def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 3ba9260..27f7f4a 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -46,15 +46,14 @@ #define GET_REGINFO_TARGET_DESC #include "PPCGenRegisterInfo.inc" -// FIXME (64-bit): Eventually enable by default. namespace llvm { -cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger", +cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger", cl::init(false), - cl::desc("Enable PPC32 register scavenger"), + cl::desc("Disable PPC32 register scavenger"), cl::Hidden); -cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger", +cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger", cl::init(false), - cl::desc("Enable PPC64 register scavenger"), + cl::desc("Disable PPC64 register scavenger"), cl::Hidden); } @@ -63,8 +62,8 @@ using namespace llvm; // FIXME (64-bit): Should be inlined. bool PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const { - return ((EnablePPC32RS && !Subtarget.isPPC64()) || - (EnablePPC64RS && Subtarget.isPPC64())); + return ((!DisablePPC32RS && !Subtarget.isPPC64()) || + (!DisablePPC64RS && Subtarget.isPPC64())); } PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, @@ -120,10 +119,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31, - PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, - PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, - PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, - PPC::LR, 0 }; @@ -149,10 +144,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31, - PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, - PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, - PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, - 0 }; // 64-bit Darwin calling convention. @@ -174,10 +165,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31, - PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, - PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, - PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, - PPC::LR8, 0 }; @@ -203,10 +190,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31, - PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, - PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, - PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, - 0 }; @@ -247,9 +230,6 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::R13); Reserved.set(PPC::R31); - if (!requiresRegisterScavenging(MF)) - Reserved.set(PPC::R0); // FIXME (64-bit): Remove - Reserved.set(PPC::X0); Reserved.set(PPC::X1); Reserved.set(PPC::X13); @@ -259,7 +239,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (Subtarget.isSVR4ABI()) { Reserved.set(PPC::X2); } - // Reserve R2 on Darwin to hack around the problem of save/restore of CR + // Reserve X2 on Darwin to hack around the problem of save/restore of CR // when the stack frame is too big to address directly; we need two regs. // This is a hack. if (Subtarget.isDarwinABI()) { @@ -291,6 +271,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, case PPC::F4RCRegClassID: case PPC::VRRCRegClassID: return 32 - DefaultSafety; + case PPC::CRRCRegClassID: + return 8 - DefaultSafety; } } @@ -301,7 +283,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, void PPCRegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - if (GuaranteedTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) { + if (MF.getTarget().Options.GuaranteedTailCallOpt && + I->getOpcode() == PPC::ADJCALLSTACKUP) { // Add (actually subtract) back the amount the callee popped on return. if (int CalleeAmt = I->getOperand(1).getImm()) { bool is64Bit = Subtarget.isPPC64(); @@ -476,28 +459,32 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, int SPAdj, RegScavenger *RS) const { // Get the instruction. - MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset>, <FI> + MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset> // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); DebugLoc dl = MI.getDebugLoc(); - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; - unsigned Reg = findScratchRegister(II, RS, RC, SPAdj); - unsigned SrcReg = MI.getOperand(0).getReg(); + // FIXME: Once LLVM supports creating virtual registers here, or the register + // scavenger can return multiple registers, stop using reserved registers + // here. + (void) SPAdj; + (void) RS; + bool LP64 = Subtarget.isPPC64(); + unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) : + (LP64 ? PPC::X0 : PPC::R0); + unsigned SrcReg = MI.getOperand(0).getReg(); // We need to store the CR in the low 4-bits of the saved value. First, issue // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg. - BuildMI(MBB, II, dl, TII.get(PPC::MFCRpseud), Reg) + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFCR8pseud : PPC::MFCRpseud), Reg) .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); // If the saved register wasn't CR0, shift the bits left so that they are in // CR0's slot. if (SrcReg != PPC::CR0) // rlwinm rA, rA, ShiftBits, 0, 31. - BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg) + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg) .addReg(Reg, RegState::Kill) .addImm(getPPCRegisterNumbering(SrcReg) * 4) .addImm(0) @@ -511,6 +498,48 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, MBB.erase(II); } +void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, + unsigned FrameIndex, int SPAdj, + RegScavenger *RS) const { + // Get the instruction. + MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CR <offset> + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + // FIXME: Once LLVM supports creating virtual registers here, or the register + // scavenger can return multiple registers, stop using reserved registers + // here. + (void) SPAdj; + (void) RS; + + bool LP64 = Subtarget.isPPC64(); + unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) : + (LP64 ? PPC::X0 : PPC::R0); + unsigned DestReg = MI.getOperand(0).getReg(); + assert(MI.definesRegister(DestReg) && + "RESTORE_CR does not define its destination"); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LWZ8 : PPC::LWZ), + Reg), FrameIndex); + + // If the reloaded register isn't CR0, shift the bits right so that they are + // in the right CR's slot. + if (DestReg != PPC::CR0) { + unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; + // rlwinm r11, r11, 32-ShiftBits, 0, 31. + BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg) + .addReg(Reg).addImm(32-ShiftBits).addImm(0) + .addImm(31); + } + + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTCRF8 : PPC::MTCRF), DestReg) + .addReg(Reg); + + // Discard the pseudo instruction. + MBB.erase(II); +} + void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const { @@ -556,16 +585,23 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, return; } - // Special case for pseudo-op SPILL_CR. - if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable by default. + // Special case for pseudo-ops SPILL_CR and RESTORE_CR. + if (requiresRegisterScavenging(MF)) { if (OpC == PPC::SPILL_CR) { lowerCRSpilling(II, FrameIndex, SPAdj, RS); return; + } else if (OpC == PPC::RESTORE_CR) { + lowerCRRestore(II, FrameIndex, SPAdj, RS); + return; } + } // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). + + bool is64Bit = Subtarget.isPPC64(); MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ? - PPC::R31 : PPC::R1, + (is64Bit ? PPC::X31 : PPC::R31) : + (is64Bit ? PPC::X1 : PPC::R1), false); // Figure out if the offset in the instruction is shifted right two bits. This @@ -611,19 +647,19 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // The offset doesn't fit into a single register, scavenge one to build the // offset in. - // FIXME: figure out what SPAdj is doing here. - // FIXME (64-bit): Use "findScratchRegister". unsigned SReg; - if (requiresRegisterScavenging(MF)) - SReg = findScratchRegister(II, RS, &PPC::GPRCRegClass, SPAdj); - else - SReg = PPC::R0; + if (requiresRegisterScavenging(MF)) { + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj); + } else + SReg = is64Bit ? PPC::X0 : PPC::R0; // Insert a set of rA with the full offset value before the ld, st, or add - BuildMI(MBB, II, dl, TII.get(PPC::LIS), SReg) + BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg) .addImm(Offset >> 16); - BuildMI(MBB, II, dl, TII.get(PPC::ORI), SReg) + BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg) .addReg(SReg, RegState::Kill) .addImm(Offset); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index f70a594..faf690f 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -57,6 +57,8 @@ public: int SPAdj, RegScavenger *RS) const; void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, int SPAdj, RegScavenger *RS) const; + void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex, + int SPAdj, RegScavenger *RS) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 8acf75c..baa0eb5 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "PPCSubtarget.h" +#include "PPCRegisterInfo.h" #include "PPC.h" #include "llvm/GlobalValue.h" #include "llvm/Target/TargetMachine.h" @@ -140,3 +141,22 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV, return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || GV->hasCommonLinkage() || isDecl; } + +bool PPCSubtarget::enablePostRAScheduler( + CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const { + if (DarwinDirective == PPC::DIR_440) + return false; + + Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; + CriticalPathRCs.clear(); + + if (isPPC64()) + CriticalPathRCs.push_back(&PPC::G8RCRegClass); + else + CriticalPathRCs.push_back(&PPC::GPRCRegClass); + + return OptLevel >= CodeGenOpt::Default; +} + diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index d2b853d..62b2424 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -148,6 +148,10 @@ public: bool isDarwinABI() const { return isDarwin(); } bool isSVR4ABI() const { return !isDarwin(); } + /// enablePostRAScheduler - True at 'More' optimization. + bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const; }; } // End llvm namespace diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index de8fca0..8e71c46 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -28,10 +28,11 @@ extern "C" void LLVMInitializePowerPCTarget() { PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, is64Bit), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), @@ -45,17 +46,19 @@ bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; } PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : PPCTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) { + : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { } PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : PPCTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) { + : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) { } @@ -81,7 +84,7 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, if (Subtarget.isPPC64()) // Temporary workaround for the inability of PPC64 JIT to handle jump // tables. - DisableJumpTables = true; + Options.DisableJumpTables = true; // Inform the subtarget that we are in JIT mode. FIXME: does this break macho // writing? diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 03b27c6..0427876 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -41,7 +41,7 @@ class PPCTargetMachine : public LLVMTargetMachine { public: PPCTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64Bit); @@ -79,7 +79,7 @@ public: class PPC32TargetMachine : public PPCTargetMachine { public: PPC32TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; @@ -89,7 +89,7 @@ public: class PPC64TargetMachine : public PPCTargetMachine { public: PPC64TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; diff --git a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt index f63111f..fdb8a62 100644 --- a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt +++ b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt @@ -4,10 +4,4 @@ add_llvm_library(LLVMPowerPCInfo PowerPCTargetInfo.cpp ) -add_llvm_library_dependencies(LLVMPowerPCInfo - LLVMMC - LLVMSupport - LLVMTarget - ) - add_dependencies(LLVMPowerPCInfo PowerPCCommonTableGen) diff --git a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt index f51b417..f77d85b 100644 --- a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt +++ b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = PowerPCInfo parent = PowerPC required_libraries = MC Support Target add_to_library_groups = PowerPC - diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt index 9687951..56ee7c2 100644 --- a/lib/Target/Sparc/CMakeLists.txt +++ b/lib/Target/Sparc/CMakeLists.txt @@ -22,17 +22,5 @@ add_llvm_target(SparcCodeGen SparcSelectionDAGInfo.cpp ) -add_llvm_library_dependencies(LLVMSparcCodeGen - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMC - LLVMSelectionDAG - LLVMSparcDesc - LLVMSparcInfo - LLVMSupport - LLVMTarget - ) - add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index dab35e5..9295408 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -100,7 +100,7 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) - if (I->getDesc().hasDelaySlot()) { + if (I->hasDelaySlot()) { MachineBasicBlock::iterator D = MBB.end(); MachineBasicBlock::iterator J = I; @@ -149,7 +149,7 @@ Filler::findDelayInstr(MachineBasicBlock &MBB, } //Call's delay filler can def some of call's uses. - if (slot->getDesc().isCall()) + if (slot->isCall()) insertCallUses(slot, RegUses); else insertDefsUses(slot, RegDefs, RegUses); @@ -170,7 +170,7 @@ Filler::findDelayInstr(MachineBasicBlock &MBB, if (I->hasUnmodeledSideEffects() || I->isInlineAsm() || I->isLabel() - || I->getDesc().hasDelaySlot() + || I->hasDelaySlot() || isDelayFiller(MBB, I)) break; @@ -194,13 +194,13 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, if (candidate->isImplicitDef() || candidate->isKill()) return true; - if (candidate->getDesc().mayLoad()) { + if (candidate->mayLoad()) { sawLoad = true; if (sawStore) return true; } - if (candidate->getDesc().mayStore()) { + if (candidate->mayStore()) { if (sawStore) return true; sawStore = true; @@ -298,13 +298,13 @@ bool Filler::isDelayFiller(MachineBasicBlock &MBB, return false; if (candidate->getOpcode() == SP::UNIMP) return true; - const MCInstrDesc &prevdesc = (--candidate)->getDesc(); - return prevdesc.hasDelaySlot(); + --candidate; + return candidate->hasDelaySlot(); } bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize) { - if (!I->getDesc().isCall()) + if (!I->isCall()) return false; unsigned structSizeOpNum = 0; diff --git a/lib/Target/Sparc/LLVMBuild.txt b/lib/Target/Sparc/LLVMBuild.txt index 38c797f..fe20d2f 100644 --- a/lib/Target/Sparc/LLVMBuild.txt +++ b/lib/Target/Sparc/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = MCTargetDesc TargetInfo + [component_0] type = TargetGroup name = Sparc @@ -27,4 +30,3 @@ name = SparcCodeGen parent = Sparc required_libraries = AsmPrinter CodeGen Core MC SelectionDAG SparcDesc SparcInfo Support Target add_to_library_groups = Sparc - diff --git a/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt index d3bdf0b..9d4db4d 100644 --- a/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt @@ -3,10 +3,4 @@ add_llvm_library(LLVMSparcDesc SparcMCAsmInfo.cpp ) -add_llvm_library_dependencies(LLVMSparcDesc - LLVMMC - LLVMSparcInfo - LLVMSupport - ) - add_dependencies(LLVMSparcDesc SparcCommonTableGen) diff --git a/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt b/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt index a339cec..97f8f16 100644 --- a/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = SparcDesc parent = Sparc required_libraries = MC SparcInfo Support add_to_library_groups = Sparc - diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index deb39d9..7548bbf 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -236,9 +236,9 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { // Check if the last terminator is an unconditional branch. MachineBasicBlock::const_iterator I = Pred->end(); - while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) + while (I != Pred->begin() && !(--I)->isTerminator()) ; // Noop - return I == Pred->end() || !I->getDesc().isBarrier(); + return I == Pred->end() || !I->isBarrier(); } diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 25104d1..3608d3b 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -763,7 +763,9 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::FMA , MVT::f32, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::CTTZ , MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::CTLZ , MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::ROTL , MVT::i32, Expand); setOperationAction(ISD::ROTR , MVT::i32, Expand); setOperationAction(ISD::BSWAP, MVT::i32, Expand); diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index 7a6bf50..5290d42 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -133,7 +133,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, break; //Terminator is not a branch - if (!I->getDesc().isBranch()) + if (!I->isBranch()) return true; //Handle Unconditional branches @@ -195,7 +195,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, .addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode); BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA)) .addMBB(TargetBB); - MBB.addSuccessor(TargetBB); + OldInst->eraseFromParent(); UnCondBrIter->eraseFromParent(); diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 7dff799..8e16fd7 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -26,10 +26,11 @@ extern "C" void LLVMInitializeSparcTarget() { /// SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64bit) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, is64bit), DataLayout(Subtarget.getDataLayout()), TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget), @@ -52,16 +53,20 @@ bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM){ SparcV8TargetMachine::SparcV8TargetMachine(const Target &T, StringRef TT, StringRef CPU, - StringRef FS, Reloc::Model RM, + StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : SparcTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) { + : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { } SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, StringRef TT, StringRef CPU, - StringRef FS, Reloc::Model RM, + StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : SparcTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) { + : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) { } diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index 63bfa5d..cedc1e3 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -34,9 +34,9 @@ class SparcTargetMachine : public LLVMTargetMachine { SparcFrameLowering FrameLowering; public: SparcTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, bool is64bit); + CodeGenOpt::Level OL, bool is64bit); virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameLowering *getFrameLowering() const { @@ -65,6 +65,7 @@ class SparcV8TargetMachine : public SparcTargetMachine { public: SparcV8TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; @@ -75,6 +76,7 @@ class SparcV9TargetMachine : public SparcTargetMachine { public: SparcV9TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; diff --git a/lib/Target/Sparc/TargetInfo/CMakeLists.txt b/lib/Target/Sparc/TargetInfo/CMakeLists.txt index a076023..b0d031e 100644 --- a/lib/Target/Sparc/TargetInfo/CMakeLists.txt +++ b/lib/Target/Sparc/TargetInfo/CMakeLists.txt @@ -4,10 +4,4 @@ add_llvm_library(LLVMSparcInfo SparcTargetInfo.cpp ) -add_llvm_library_dependencies(LLVMSparcInfo - LLVMMC - LLVMSupport - LLVMTarget - ) - add_dependencies(LLVMSparcInfo SparcCommonTableGen) diff --git a/lib/Target/Sparc/TargetInfo/LLVMBuild.txt b/lib/Target/Sparc/TargetInfo/LLVMBuild.txt index 81c9032..b5c320f 100644 --- a/lib/Target/Sparc/TargetInfo/LLVMBuild.txt +++ b/lib/Target/Sparc/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = SparcInfo parent = Sparc required_libraries = MC Support Target add_to_library_groups = Sparc - diff --git a/lib/Target/TargetFrameLowering.cpp b/lib/Target/TargetFrameLowering.cpp deleted file mode 100644 index 122f869..0000000 --- a/lib/Target/TargetFrameLowering.cpp +++ /dev/null @@ -1,45 +0,0 @@ -//===----- TargetFrameLowering.cpp - Implement target frame interface ------==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Implements the layout of a stack frame on the target machine. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" - -#include <cstdlib> -using namespace llvm; - -TargetFrameLowering::~TargetFrameLowering() { -} - -/// getFrameIndexOffset - Returns the displacement from the frame register to -/// the stack frame of the specified index. This is the default implementation -/// which is overridden for some targets. -int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return MFI->getObjectOffset(FI) + MFI->getStackSize() - - getOffsetOfLocalArea() + MFI->getOffsetAdjustment(); -} - -int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, - int FI, unsigned &FrameReg) const { - const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo(); - - // By default, assume all frame indices are referenced via whatever - // getFrameRegister() says. The target can override this if it's doing - // something different. - FrameReg = RI->getFrameRegister(MF); - return getFrameIndexOffset(MF, FI); -} diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp index d52ecb3..440f9ad 100644 --- a/lib/Target/TargetInstrInfo.cpp +++ b/lib/Target/TargetInstrInfo.cpp @@ -13,7 +13,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/ErrorHandling.h" @@ -73,23 +72,6 @@ TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); } -int -TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, - SDNode *DefNode, unsigned DefIdx, - SDNode *UseNode, unsigned UseIdx) const { - if (!ItinData || ItinData->isEmpty()) - return -1; - - if (!DefNode->isMachineOpcode()) - return -1; - - unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass(); - if (!UseNode->isMachineOpcode()) - return ItinData->getOperandCycle(DefClass, DefIdx); - unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass(); - return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); -} - int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost) const { @@ -99,17 +81,6 @@ int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, return ItinData->getStageLatency(MI->getDesc().getSchedClass()); } -int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, - SDNode *N) const { - if (!ItinData || ItinData->isEmpty()) - return 1; - - if (!N->isMachineOpcode()) - return 1; - - return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass()); -} - bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx) const { @@ -129,19 +100,6 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, } -bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.isTerminator()) return false; - - // Conditional branch is a special case. - if (MCID.isBranch() && !MCID.isBarrier()) - return true; - if (!MCID.isPredicable()) - return true; - return !isPredicated(MI); -} - - /// Measure the specified inline asm to determine an approximation of its /// length. /// Comments (which run till the next SeparatorString or newline) do not diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index aa2e014..768facb 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -22,15 +22,96 @@ char TargetLibraryInfo::ID = 0; const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = { - "memset", + "acos", + "acosl", + "acosf", + "asin", + "asinl", + "asinf", + "atan", + "atanl", + "atanf", + "atan2", + "atan2l", + "atan2f", + "ceil", + "ceill", + "ceilf", + "copysign", + "copysignf", + "copysignl", + "cos", + "cosl", + "cosf", + "cosh", + "coshl", + "coshf", + "exp", + "expl", + "expf", + "exp2", + "exp2l", + "exp2f", + "expm1", + "expm1l", + "expl1f", + "fabs", + "fabsl", + "fabsf", + "floor", + "floorl", + "floorf", + "fiprintf", + "fmod", + "fmodl", + "fmodf", + "fputs", + "fwrite", + "iprintf", + "log", + "logl", + "logf", + "log2", + "log2l", + "log2f", + "log10", + "log10l", + "log10f", + "log1p", + "log1pl", + "log1pf", "memcpy", "memmove", + "memset", "memset_pattern16", - "iprintf", + "nearbyint", + "nearbyintf", + "nearbyintl", + "pow", + "powf", + "powl", + "rint", + "rintf", + "rintl", + "sin", + "sinl", + "sinf", + "sinh", + "sinhl", + "sinhf", "siprintf", - "fiprintf", - "fwrite", - "fputs" + "sqrt", + "sqrtl", + "sqrtf", + "tan", + "tanl", + "tanf", + "tanh", + "tanhl", + "tanhf", + "trunc", + "truncf", + "truncl" }; /// initialize - Initialize the set of available library functions based on the diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 56b7b69..fc8b67b 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -48,7 +48,7 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx, TargetLoweringObjectFile::~TargetLoweringObjectFile() { } -static bool isSuitableForBSS(const GlobalVariable *GV) { +static bool isSuitableForBSS(const GlobalVariable *GV, bool NoZerosInBSS) { const Constant *C = GV->getInitializer(); // Must have zero initializer. @@ -133,7 +133,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, // Handle thread-local data first. if (GVar->isThreadLocal()) { - if (isSuitableForBSS(GVar)) + if (isSuitableForBSS(GVar, TM.Options.NoZerosInBSS)) return SectionKind::getThreadBSS(); return SectionKind::getThreadData(); } @@ -143,7 +143,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, return SectionKind::getCommon(); // Variable can be easily put to BSS section. - if (isSuitableForBSS(GVar)) { + if (isSuitableForBSS(GVar, TM.Options.NoZerosInBSS)) { if (GVar->hasLocalLinkage()) return SectionKind::getBSSLocal(); else if (GVar->hasExternalLinkage()) diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 805e16e..fb7bbbb 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -11,8 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -24,153 +22,11 @@ using namespace llvm; // namespace llvm { - bool LessPreciseFPMADOption; - bool PrintMachineCode; - bool NoFramePointerElim; - bool NoFramePointerElimNonLeaf; - bool NoExcessFPPrecision; - bool UnsafeFPMath; - bool NoInfsFPMath; - bool NoNaNsFPMath; - bool HonorSignDependentRoundingFPMathOption; - bool UseSoftFloat; - FloatABI::ABIType FloatABIType; - bool NoImplicitFloat; - bool NoZerosInBSS; - bool JITExceptionHandling; - bool JITEmitDebugInfo; - bool JITEmitDebugInfoToDisk; - bool GuaranteedTailCallOpt; - unsigned StackAlignmentOverride; - bool RealignStack; - bool DisableJumpTables; bool StrongPHIElim; bool HasDivModLibcall; bool AsmVerbosityDefault(false); - bool EnableSegmentedStacks; } -static cl::opt<bool, true> -PrintCode("print-machineinstrs", - cl::desc("Print generated machine code"), - cl::location(PrintMachineCode), cl::init(false)); -static cl::opt<bool, true> -DisableFPElim("disable-fp-elim", - cl::desc("Disable frame pointer elimination optimization"), - cl::location(NoFramePointerElim), - cl::init(false)); -static cl::opt<bool, true> -DisableFPElimNonLeaf("disable-non-leaf-fp-elim", - cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"), - cl::location(NoFramePointerElimNonLeaf), - cl::init(false)); -static cl::opt<bool, true> -DisableExcessPrecision("disable-excess-fp-precision", - cl::desc("Disable optimizations that may increase FP precision"), - cl::location(NoExcessFPPrecision), - cl::init(false)); -static cl::opt<bool, true> -EnableFPMAD("enable-fp-mad", - cl::desc("Enable less precise MAD instructions to be generated"), - cl::location(LessPreciseFPMADOption), - cl::init(false)); -static cl::opt<bool, true> -EnableUnsafeFPMath("enable-unsafe-fp-math", - cl::desc("Enable optimizations that may decrease FP precision"), - cl::location(UnsafeFPMath), - cl::init(false)); -static cl::opt<bool, true> -EnableNoInfsFPMath("enable-no-infs-fp-math", - cl::desc("Enable FP math optimizations that assume no +-Infs"), - cl::location(NoInfsFPMath), - cl::init(false)); -static cl::opt<bool, true> -EnableNoNaNsFPMath("enable-no-nans-fp-math", - cl::desc("Enable FP math optimizations that assume no NaNs"), - cl::location(NoNaNsFPMath), - cl::init(false)); -static cl::opt<bool, true> -EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math", - cl::Hidden, - cl::desc("Force codegen to assume rounding mode can change dynamically"), - cl::location(HonorSignDependentRoundingFPMathOption), - cl::init(false)); -static cl::opt<bool, true> -GenerateSoftFloatCalls("soft-float", - cl::desc("Generate software floating point library calls"), - cl::location(UseSoftFloat), - cl::init(false)); -static cl::opt<llvm::FloatABI::ABIType, true> -FloatABIForCalls("float-abi", - cl::desc("Choose float ABI type"), - cl::location(FloatABIType), - cl::init(FloatABI::Default), - cl::values( - clEnumValN(FloatABI::Default, "default", - "Target default float ABI type"), - clEnumValN(FloatABI::Soft, "soft", - "Soft float ABI (implied by -soft-float)"), - clEnumValN(FloatABI::Hard, "hard", - "Hard float ABI (uses FP registers)"), - clEnumValEnd)); -static cl::opt<bool, true> -DontPlaceZerosInBSS("nozero-initialized-in-bss", - cl::desc("Don't place zero-initialized symbols into bss section"), - cl::location(NoZerosInBSS), - cl::init(false)); -static cl::opt<bool, true> -EnableJITExceptionHandling("jit-enable-eh", - cl::desc("Emit exception handling information"), - cl::location(JITExceptionHandling), - cl::init(false)); -// In debug builds, make this default to true. -#ifdef NDEBUG -#define EMIT_DEBUG false -#else -#define EMIT_DEBUG true -#endif -static cl::opt<bool, true> -EmitJitDebugInfo("jit-emit-debug", - cl::desc("Emit debug information to debugger"), - cl::location(JITEmitDebugInfo), - cl::init(EMIT_DEBUG)); -#undef EMIT_DEBUG -static cl::opt<bool, true> -EmitJitDebugInfoToDisk("jit-emit-debug-to-disk", - cl::Hidden, - cl::desc("Emit debug info objfiles to disk"), - cl::location(JITEmitDebugInfoToDisk), - cl::init(false)); - -static cl::opt<bool, true> -EnableGuaranteedTailCallOpt("tailcallopt", - cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."), - cl::location(GuaranteedTailCallOpt), - cl::init(false)); -static cl::opt<unsigned, true> -OverrideStackAlignment("stack-alignment", - cl::desc("Override default stack alignment"), - cl::location(StackAlignmentOverride), - cl::init(0)); -static cl::opt<bool, true> -EnableRealignStack("realign-stack", - cl::desc("Realign stack if needed"), - cl::location(RealignStack), - cl::init(true)); -static cl::opt<bool, true> -DisableSwitchTables(cl::Hidden, "disable-jump-tables", - cl::desc("Do not generate jump tables."), - cl::location(DisableJumpTables), - cl::init(false)); -static cl::opt<bool, true> -EnableStrongPHIElim(cl::Hidden, "strong-phi-elim", - cl::desc("Use strong PHI elimination."), - cl::location(StrongPHIElim), - cl::init(false)); -static cl::opt<std::string> -TrapFuncName("trap-func", cl::Hidden, - cl::desc("Emit a call to trap function rather than a trap instruction"), - cl::init("")); static cl::opt<bool> DataSections("fdata-sections", cl::desc("Emit data into separate sections"), @@ -179,18 +35,14 @@ static cl::opt<bool> FunctionSections("ffunction-sections", cl::desc("Emit functions into separate sections"), cl::init(false)); -static cl::opt<bool, true> -SegmentedStacks("segmented-stacks", - cl::desc("Use segmented stacks if possible."), - cl::location(EnableSegmentedStacks), - cl::init(false)); //--------------------------------------------------------------------------- // TargetMachine Class // TargetMachine::TargetMachine(const Target &T, - StringRef TT, StringRef CPU, StringRef FS) + StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options) : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS), CodeGenInfo(0), AsmInfo(0), MCRelaxAll(false), @@ -198,11 +50,8 @@ TargetMachine::TargetMachine(const Target &T, MCSaveTempLabels(false), MCUseLoc(true), MCUseCFI(true), - MCUseDwarfDirectory(false) { - // Typically it will be subtargets that will adjust FloatABIType from Default - // to Soft or Hard. - if (UseSoftFloat) - FloatABIType = FloatABI::Soft; + MCUseDwarfDirectory(false), + Options(Options) { } TargetMachine::~TargetMachine() { @@ -258,36 +107,3 @@ void TargetMachine::setDataSections(bool V) { DataSections = V; } -namespace llvm { - /// DisableFramePointerElim - This returns true if frame pointer elimination - /// optimization should be disabled for the given machine function. - bool DisableFramePointerElim(const MachineFunction &MF) { - // Check to see if we should eliminate non-leaf frame pointers and then - // check to see if we should eliminate all frame pointers. - if (NoFramePointerElimNonLeaf && !NoFramePointerElim) { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return MFI->hasCalls(); - } - - return NoFramePointerElim; - } - - /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option - /// is specified on the command line. When this flag is off(default), the - /// code generator is not allowed to generate mad (multiply add) if the - /// result is "less precise" than doing those operations individually. - bool LessPreciseFPMAD() { return UnsafeFPMath || LessPreciseFPMADOption; } - - /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume - /// that the rounding mode of the FPU can change from its default. - bool HonorSignDependentRoundingFPMath() { - return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption; - } - - /// getTrapFunctionName - If this returns a non-empty string, this means isel - /// should lower Intrinsic::trap to a call to the specified function name - /// instead of an ISD::TRAP node. - StringRef getTrapFunctionName() { - return TrapFuncName; - } -} diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp index 67239b8..2689837 100644 --- a/lib/Target/TargetRegisterInfo.cpp +++ b/lib/Target/TargetRegisterInfo.cpp @@ -13,8 +13,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt index 94aca7a..47489bb 100644 --- a/lib/Target/X86/AsmParser/CMakeLists.txt +++ b/lib/Target/X86/AsmParser/CMakeLists.txt @@ -5,12 +5,4 @@ add_llvm_library(LLVMX86AsmParser X86AsmParser.cpp ) -add_llvm_library_dependencies(LLVMX86AsmParser - LLVMMC - LLVMMCParser - LLVMSupport - LLVMX86Desc - LLVMX86Info - ) - add_dependencies(LLVMX86AsmParser X86CommonTableGen) diff --git a/lib/Target/X86/AsmParser/LLVMBuild.txt b/lib/Target/X86/AsmParser/LLVMBuild.txt index 6c2405a..9f94d5d 100644 --- a/lib/Target/X86/AsmParser/LLVMBuild.txt +++ b/lib/Target/X86/AsmParser/LLVMBuild.txt @@ -21,4 +21,3 @@ name = X86AsmParser parent = X86 required_libraries = MC MCParser Support X86Desc X86Info add_to_library_groups = X86 - diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 4542d4b..be15899 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -51,21 +51,6 @@ endif() add_llvm_target(X86CodeGen ${sources}) -add_llvm_library_dependencies(LLVMX86CodeGen - LLVMAnalysis - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMC - LLVMSelectionDAG - LLVMSupport - LLVMTarget - LLVMX86AsmPrinter - LLVMX86Desc - LLVMX86Info - LLVMX86Utils - ) - add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt index 4f570d5..0cd6db9 100644 --- a/lib/Target/X86/Disassembler/CMakeLists.txt +++ b/lib/Target/X86/Disassembler/CMakeLists.txt @@ -5,12 +5,6 @@ add_llvm_library(LLVMX86Disassembler X86DisassemblerDecoder.c ) -add_llvm_library_dependencies(LLVMX86Disassembler - LLVMMC - LLVMSupport - LLVMX86Info - ) - # workaround for hanging compilation on MSVC9 and 10 if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) set_property( diff --git a/lib/Target/X86/Disassembler/LLVMBuild.txt b/lib/Target/X86/Disassembler/LLVMBuild.txt index cd748cf..cac7adf 100644 --- a/lib/Target/X86/Disassembler/LLVMBuild.txt +++ b/lib/Target/X86/Disassembler/LLVMBuild.txt @@ -21,4 +21,3 @@ name = X86Disassembler parent = X86 required_libraries = MC Support X86Info add_to_library_groups = X86 - diff --git a/lib/Target/X86/InstPrinter/CMakeLists.txt b/lib/Target/X86/InstPrinter/CMakeLists.txt index 2a2b5db..28e2460 100644 --- a/lib/Target/X86/InstPrinter/CMakeLists.txt +++ b/lib/Target/X86/InstPrinter/CMakeLists.txt @@ -6,10 +6,4 @@ add_llvm_library(LLVMX86AsmPrinter X86InstComments.cpp ) -add_llvm_library_dependencies(LLVMX86AsmPrinter - LLVMMC - LLVMSupport - LLVMX86Utils - ) - add_dependencies(LLVMX86AsmPrinter X86CommonTableGen) diff --git a/lib/Target/X86/InstPrinter/LLVMBuild.txt b/lib/Target/X86/InstPrinter/LLVMBuild.txt index fb01323..6868dde 100644 --- a/lib/Target/X86/InstPrinter/LLVMBuild.txt +++ b/lib/Target/X86/InstPrinter/LLVMBuild.txt @@ -21,4 +21,3 @@ name = X86AsmPrinter parent = X86 required_libraries = MC Support X86Utils add_to_library_groups = X86 - diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 6e87efa..6e4b1b9 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -106,28 +106,92 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, // FALL THROUGH. case X86::PUNPCKHBWrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKHMask(16, ShuffleMask); + DecodeUNPCKHMask(MVT::v16i8, ShuffleMask); + break; + case X86::VPUNPCKHBWrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKHBWrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(MVT::v16i8, ShuffleMask); + break; + case X86::VPUNPCKHBWYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKHBWYrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(MVT::v32i8, ShuffleMask); break; case X86::PUNPCKHWDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKHWDrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKHMask(8, ShuffleMask); + DecodeUNPCKHMask(MVT::v8i16, ShuffleMask); + break; + case X86::VPUNPCKHWDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKHWDrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(MVT::v8i16, ShuffleMask); + break; + case X86::VPUNPCKHWDYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKHWDYrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(MVT::v16i16, ShuffleMask); break; case X86::PUNPCKHDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKHDQrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKHMask(4, ShuffleMask); + DecodeUNPCKHMask(MVT::v4i32, ShuffleMask); + break; + case X86::VPUNPCKHDQrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKHDQrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(MVT::v4i32, ShuffleMask); + break; + case X86::VPUNPCKHDQYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKHDQYrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(MVT::v8i32, ShuffleMask); break; case X86::PUNPCKHQDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKHQDQrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKHMask(2, ShuffleMask); + DecodeUNPCKHMask(MVT::v2i64, ShuffleMask); + break; + case X86::VPUNPCKHQDQrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKHQDQrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(MVT::v2i64, ShuffleMask); + break; + case X86::VPUNPCKHQDQYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKHQDQYrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(MVT::v4i64, ShuffleMask); break; case X86::PUNPCKLBWrr: @@ -135,42 +199,117 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, // FALL THROUGH. case X86::PUNPCKLBWrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLBWMask(16, ShuffleMask); + DecodeUNPCKLMask(MVT::v16i8, ShuffleMask); + break; + case X86::VPUNPCKLBWrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKLBWrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(MVT::v16i8, ShuffleMask); + break; + case X86::VPUNPCKLBWYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKLBWYrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(MVT::v32i8, ShuffleMask); break; case X86::PUNPCKLWDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKLWDrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLWDMask(8, ShuffleMask); + DecodeUNPCKLMask(MVT::v8i16, ShuffleMask); + break; + case X86::VPUNPCKLWDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKLWDrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(MVT::v8i16, ShuffleMask); + break; + case X86::VPUNPCKLWDYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKLWDYrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(MVT::v16i16, ShuffleMask); break; case X86::PUNPCKLDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKLDQrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLDQMask(4, ShuffleMask); + DecodeUNPCKLMask(MVT::v4i32, ShuffleMask); + break; + case X86::VPUNPCKLDQrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKLDQrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(MVT::v4i32, ShuffleMask); + break; + case X86::VPUNPCKLDQYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKLDQYrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(MVT::v8i32, ShuffleMask); break; case X86::PUNPCKLQDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKLQDQrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLQDQMask(2, ShuffleMask); + DecodeUNPCKLMask(MVT::v2i64, ShuffleMask); + break; + case X86::VPUNPCKLQDQrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKLQDQrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(MVT::v2i64, ShuffleMask); + break; + case X86::VPUNPCKLQDQYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKLQDQYrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(MVT::v4i64, ShuffleMask); break; case X86::SHUFPDrri: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::SHUFPDrmi: - DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask); + DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; case X86::VSHUFPDrri: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VSHUFPDrmi: - DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask); + DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; + case X86::VSHUFPDYrri: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VSHUFPDYrmi: + DecodeSHUFPMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -179,14 +318,25 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::SHUFPSrmi: - DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask); + DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; case X86::VSHUFPSrri: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VSHUFPSrmi: - DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask); + DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; + case X86::VSHUFPSYrri: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VSHUFPSYrmi: + DecodeSHUFPMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -195,14 +345,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKLPDrm: - DecodeUNPCKLPMask(MVT::v2f64, ShuffleMask); + DecodeUNPCKLMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKLPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKLPDrm: - DecodeUNPCKLPMask(MVT::v2f64, ShuffleMask); + DecodeUNPCKLMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -210,7 +360,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKLPDYrm: - DecodeUNPCKLPMask(MVT::v4f64, ShuffleMask); + DecodeUNPCKLMask(MVT::v4f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -218,14 +368,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKLPSrm: - DecodeUNPCKLPMask(MVT::v4f32, ShuffleMask); + DecodeUNPCKLMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKLPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKLPSrm: - DecodeUNPCKLPMask(MVT::v4f32, ShuffleMask); + DecodeUNPCKLMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -233,7 +383,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKLPSYrm: - DecodeUNPCKLPMask(MVT::v8f32, ShuffleMask); + DecodeUNPCKLMask(MVT::v8f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -241,14 +391,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKHPDrm: - DecodeUNPCKHPMask(MVT::v2f64, ShuffleMask); + DecodeUNPCKHMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKHPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKHPDrm: - DecodeUNPCKHPMask(MVT::v2f64, ShuffleMask); + DecodeUNPCKHMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -256,7 +406,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKHPDYrm: - DecodeUNPCKLPMask(MVT::v4f64, ShuffleMask); + DecodeUNPCKHMask(MVT::v4f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -264,14 +414,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKHPSrm: - DecodeUNPCKHPMask(MVT::v4f32, ShuffleMask); + DecodeUNPCKHMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKHPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKHPSrm: - DecodeUNPCKHPMask(MVT::v4f32, ShuffleMask); + DecodeUNPCKHMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -279,34 +429,52 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKHPSYrm: - DecodeUNPCKHPMask(MVT::v8f32, ShuffleMask); + DecodeUNPCKHMask(MVT::v8f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VPERMILPSri: - DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(), - ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::VPERMILPSmi: + DecodeVPERMILPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VPERMILPSYri: - DecodeVPERMILPSMask(8, MI->getOperand(2).getImm(), - ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::VPERMILPSYmi: + DecodeVPERMILPMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VPERMILPDri: - DecodeVPERMILPDMask(2, MI->getOperand(2).getImm(), - ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::VPERMILPDmi: + DecodeVPERMILPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VPERMILPDYri: - DecodeVPERMILPDMask(4, MI->getOperand(2).getImm(), - ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::VPERMILPDYmi: + DecodeVPERMILPMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VPERM2F128rr: - DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask); - Src1Name = getRegName(MI->getOperand(1).getReg()); + case X86::VPERM2I128rr: Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPERM2F128rm: + case X86::VPERM2I128rm: + DecodeVPERM2F128Mask(MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); break; } diff --git a/lib/Target/X86/LLVMBuild.txt b/lib/Target/X86/LLVMBuild.txt index 514566c..87305e0 100644 --- a/lib/Target/X86/LLVMBuild.txt +++ b/lib/Target/X86/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo Utils + [component_0] type = TargetGroup name = X86 @@ -30,4 +33,3 @@ name = X86CodeGen parent = X86 required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target X86AsmPrinter X86Desc X86Info X86Utils add_to_library_groups = X86 - diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt index 264e791..ab2ebb4 100644 --- a/lib/Target/X86/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt @@ -6,13 +6,6 @@ add_llvm_library(LLVMX86Desc X86MachObjectWriter.cpp ) -add_llvm_library_dependencies(LLVMX86Desc - LLVMMC - LLVMSupport - LLVMX86AsmPrinter - LLVMX86Info - ) - add_dependencies(LLVMX86Desc X86CommonTableGen) # Hack: we need to include 'main' target directory to grab private headers diff --git a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt index 3d09301..9e1d29c 100644 --- a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = X86Desc parent = X86 required_libraries = MC Support X86AsmPrinter X86Info add_to_library_groups = X86 - diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 69ad7d7..87b2b05 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -107,6 +107,11 @@ public: bool MayNeedRelaxation(const MCInst &Inst) const; + bool fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const; + void RelaxInstruction(const MCInst &Inst, MCInst &Res) const; bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; @@ -244,6 +249,14 @@ bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const { return hasExp && !hasRIP; } +bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const { + // Relax if the value is too big for a (signed) i8. + return int64_t(Value) != int64_t(int8_t(Value)); +} + // FIXME: Can tblgen help at all here to verify there aren't other instructions // we can relax? void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index c50f785..662ac1d 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -304,6 +304,12 @@ namespace X86II { // TAXD - Prefix before and after 0x0F. Combination of TA and XD. TAXD = 19 << Op0Shift, + // XOP8 - Prefix to include use of imm byte. + XOP8 = 20 << Op0Shift, + + // XOP9 - Prefix to exclude use of imm byte. + XOP9 = 21 << Op0Shift, + //===------------------------------------------------------------------===// // REX_W - REX prefixes are instruction prefixes used in 64-bit mode. // They are used to specify GPRs and SSE registers, 64-bit operand size, @@ -418,7 +424,16 @@ namespace X86II { /// storing a classifier in the imm8 field. To simplify our implementation, /// we handle this by storeing the classifier in the opcode field and using /// this flag to indicate that the encoder should do the wacky 3DNow! thing. - Has3DNow0F0FOpcode = 1U << 7 + Has3DNow0F0FOpcode = 1U << 7, + + /// XOP_W - Same bit as VEX_W. Used to indicate swapping of + /// operand 3 and 4 to be encoded in ModRM or I8IMM. This is used + /// for FMA4 and XOP instructions. + XOP_W = 1U << 8, + + /// XOP - Opcode prefix used by XOP instructions. + XOP = 1U << 9 + }; // getBaseOpcodeFor - This function returns the "base" X86 opcode for the @@ -488,9 +503,12 @@ namespace X86II { return 0; case X86II::MRMSrcMem: { bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; + bool HasXOP_W = (TSFlags >> X86II::VEXShift) & X86II::XOP_W; unsigned FirstMemOp = 1; if (HasVEX_4V) ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV). + if (HasXOP_W) + ++FirstMemOp;// Skip the register source (which is encoded in I8IMM). // FIXME: Maybe lea should have its own form? This is a horrible hack. //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 2703100..eb64ad1 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -125,7 +125,19 @@ getNonexecutableStackSection(MCContext &Ctx) const { 0, SectionKind::getMetadata()); } -X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) { +X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { + if (Triple.getArch() == Triple::x86_64) { + GlobalPrefix = ""; + PrivateGlobalPrefix = ".L"; + } + + AsmTransCBE = x86_asm_table; + AssemblerDialect = AsmWriterFlavor; + + TextAlignFillValue = 0x90; +} + +X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) { if (Triple.getArch() == Triple::x86_64) { GlobalPrefix = ""; PrivateGlobalPrefix = ".L"; diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h index 2cd4c8e..5d619e8 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h @@ -38,8 +38,12 @@ namespace llvm { virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const; }; - struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF { - explicit X86MCAsmInfoCOFF(const Triple &Triple); + struct X86MCAsmInfoMicrosoft : public MCAsmInfoMicrosoft { + explicit X86MCAsmInfoMicrosoft(const Triple &Triple); + }; + + struct X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF { + explicit X86MCAsmInfoGNUCOFF(const Triple &Triple); }; } // namespace llvm diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 1ab469c..8e14cb1 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -169,23 +169,36 @@ static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) { return false; } -/// StartsWithGlobalOffsetTable - Return true for the simple cases where this -/// expression starts with _GLOBAL_OFFSET_TABLE_. This is a needed to support -/// PIC on ELF i386 as that symbol is magic. We check only simple case that +/// StartsWithGlobalOffsetTable - Check if this expression starts with +/// _GLOBAL_OFFSET_TABLE_ and if it is of the form +/// _GLOBAL_OFFSET_TABLE_-symbol. This is needed to support PIC on ELF +/// i386 as _GLOBAL_OFFSET_TABLE_ is magical. We check only simple case that /// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start /// of a binary expression. -static bool StartsWithGlobalOffsetTable(const MCExpr *Expr) { +enum GlobalOffsetTableExprKind { + GOT_None, + GOT_Normal, + GOT_SymDiff +}; +static GlobalOffsetTableExprKind +StartsWithGlobalOffsetTable(const MCExpr *Expr) { + const MCExpr *RHS = 0; if (Expr->getKind() == MCExpr::Binary) { const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr); Expr = BE->getLHS(); + RHS = BE->getRHS(); } if (Expr->getKind() != MCExpr::SymbolRef) - return false; + return GOT_None; const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr); const MCSymbol &S = Ref->getSymbol(); - return S.getName() == "_GLOBAL_OFFSET_TABLE_"; + if (S.getName() != "_GLOBAL_OFFSET_TABLE_") + return GOT_None; + if (RHS && RHS->getKind() == MCExpr::SymbolRef) + return GOT_SymDiff; + return GOT_Normal; } void X86MCCodeEmitter:: @@ -209,12 +222,15 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, // If we have an immoffset, add it to the expression. if ((FixupKind == FK_Data_4 || - FixupKind == MCFixupKind(X86::reloc_signed_4byte)) && - StartsWithGlobalOffsetTable(Expr)) { - assert(ImmOffset == 0); - - FixupKind = MCFixupKind(X86::reloc_global_offset_table); - ImmOffset = CurByte; + FixupKind == MCFixupKind(X86::reloc_signed_4byte))) { + GlobalOffsetTableExprKind Kind = StartsWithGlobalOffsetTable(Expr); + if (Kind != GOT_None) { + assert(ImmOffset == 0); + + FixupKind = MCFixupKind(X86::reloc_global_offset_table); + if (Kind == GOT_Normal) + ImmOffset = CurByte; + } } // If the fixup is pc-relative, we need to bias the value to be relative to @@ -415,6 +431,13 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // opcode extension, or ignored, depending on the opcode byte) unsigned char VEX_W = 0; + // XOP_W: opcode specific, same bit as VEX_W, but used to + // swap operand 3 and 4 for FMA4 and XOP instructions + unsigned char XOP_W = 0; + + // XOP: Use XOP prefix byte 0x8f instead of VEX. + unsigned char XOP = 0; + // VEX_5M (VEX m-mmmmm field): // // 0b00000: Reserved for future use @@ -422,7 +445,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // 0b00010: implied 0F 38 leading opcode bytes // 0b00011: implied 0F 3A leading opcode bytes // 0b00100-0b11111: Reserved for future use - // + // 0b01000: XOP map select - 08h instructions with imm byte + // 0b10001: XOP map select - 09h instructions with no imm byte unsigned char VEX_5M = 0x1; // VEX_4V (VEX vvvv field): a register specifier @@ -453,6 +477,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W) VEX_W = 1; + if ((TSFlags >> X86II::VEXShift) & X86II::XOP_W) + XOP_W = 1; + + if ((TSFlags >> X86II::VEXShift) & X86II::XOP) + XOP = 1; + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L) VEX_L = 1; @@ -482,6 +512,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::XD: // F2 0F VEX_PP = 0x3; break; + case X86II::XOP8: + VEX_5M = 0x8; + break; + case X86II::XOP9: + VEX_5M = 0x9; + break; case X86II::A6: // Bypass: Not used by VEX case X86II::A7: // Bypass: Not used by VEX case X86II::TB: // Bypass: Not used by VEX @@ -489,6 +525,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, break; // No prefix! } + // Set the vector length to 256-bit if YMM0-YMM15 is used for (unsigned i = 0; i != MI.getNumOperands(); ++i) { if (!MI.getOperand(i).isReg()) @@ -529,6 +566,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // src1(ModR/M), MemAddr, imm8 // src1(ModR/M), MemAddr, src2(VEX_I8IMM) // + // FMA4: + // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) + // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M), if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) VEX_R = 0x0; @@ -620,16 +660,16 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3); - if (VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { // 2 byte VEX prefix + if (VEX_B && VEX_X && !VEX_W && !XOP && (VEX_5M == 1)) { // 2 byte VEX prefix EmitByte(0xC5, CurByte, OS); EmitByte(LastByte | (VEX_R << 7), CurByte, OS); return; } // 3 byte VEX prefix - EmitByte(0xC4, CurByte, OS); + EmitByte(XOP ? 0x8F : 0xC4, CurByte, OS); EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS); - EmitByte(LastByte | (VEX_W << 7), CurByte, OS); + EmitByte(LastByte | ((VEX_W | XOP_W) << 7), CurByte, OS); } /// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64 @@ -889,6 +929,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, // It uses the VEX.VVVV field? bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasXOP_W = (TSFlags >> X86II::VEXShift) & X86II::XOP_W; + unsigned XOP_W_I8IMMOperand = 2; // Determine where the memory operand starts, if present. int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode); @@ -961,9 +1003,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) SrcRegNum++; + if(HasXOP_W) // Skip 2nd src (which is encoded in I8IMM) + SrcRegNum++; + EmitRegModRMByte(MI.getOperand(SrcRegNum), GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS); - CurOp = SrcRegNum + 1; + + // 2 operands skipped with HasXOP_W, comensate accordingly + CurOp = HasXOP_W ? SrcRegNum : SrcRegNum + 1; if (HasVEX_4VOp3) ++CurOp; break; @@ -975,6 +1022,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, ++AddrOperands; ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV). } + if(HasXOP_W) // Skip second register source (encoded in I8IMM) + ++FirstMemOp; EmitByte(BaseOpcode, CurByte, OS); @@ -1062,12 +1111,24 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, // according to the right size for the instruction. if (CurOp != NumOps) { // The last source register of a 4 operand instruction in AVX is encoded - // in bits[7:4] of a immediate byte, and bits[3:0] are ignored. + // in bits[7:4] of a immediate byte. if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) { - const MCOperand &MO = MI.getOperand(CurOp++); + const MCOperand &MO = MI.getOperand(HasXOP_W ? XOP_W_I8IMMOperand + : CurOp); + CurOp++; bool IsExtReg = X86II::isX86_64ExtendedReg(MO.getReg()); unsigned RegNum = (IsExtReg ? (1 << 7) : 0); RegNum |= GetX86RegNum(MO) << 4; + // If there is an additional 5th operand it must be an immediate, which + // is encoded in bits[3:0] + if(CurOp != NumOps) { + const MCOperand &MIMM = MI.getOperand(CurOp++); + if(MIMM.isImm()) { + unsigned Val = MIMM.getImm(); + assert(Val < 16 && "Immediate operand value out of range"); + RegNum |= Val; + } + } EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS, Fixups); } else { diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index a843515..f2a34ed 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -361,8 +361,10 @@ static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) { MAI = new X86_64MCAsmInfoDarwin(TheTriple); else MAI = new X86MCAsmInfoDarwin(TheTriple); - } else if (TheTriple.isOSWindows()) { - MAI = new X86MCAsmInfoCOFF(TheTriple); + } else if (TheTriple.getOS() == Triple::Win32) { + MAI = new X86MCAsmInfoMicrosoft(TheTriple); + } else if (TheTriple.getOS() == Triple::MinGW32 || TheTriple.getOS() == Triple::Cygwin) { + MAI = new X86MCAsmInfoGNUCOFF(TheTriple); } else { MAI = new X86ELFMCAsmInfo(TheTriple); } diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 7d901af..a581993 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -922,16 +922,3 @@ _test2: ## @test2 The insertps's of $0 are pointless complex copies. //===---------------------------------------------------------------------===// - -If SSE4.1 is available we should inline rounding functions instead of emitting -a libcall. - -floor: roundsd $0x01, %xmm, %xmm -ceil: roundsd $0x02, %xmm, %xmm - -and likewise for the single precision versions. - -Currently, SelectionDAGBuilder doesn't turn calls to these functions into the -corresponding nodes and some targets (including X86) aren't ready for them. - -//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/TargetInfo/CMakeLists.txt b/lib/Target/X86/TargetInfo/CMakeLists.txt index 4da00fa..b1d0b9f 100644 --- a/lib/Target/X86/TargetInfo/CMakeLists.txt +++ b/lib/Target/X86/TargetInfo/CMakeLists.txt @@ -4,10 +4,4 @@ add_llvm_library(LLVMX86Info X86TargetInfo.cpp ) -add_llvm_library_dependencies(LLVMX86Info - LLVMMC - LLVMSupport - LLVMTarget - ) - add_dependencies(LLVMX86Info X86CommonTableGen) diff --git a/lib/Target/X86/TargetInfo/LLVMBuild.txt b/lib/Target/X86/TargetInfo/LLVMBuild.txt index ee015bd..3c64a22 100644 --- a/lib/Target/X86/TargetInfo/LLVMBuild.txt +++ b/lib/Target/X86/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = X86Info parent = X86 required_libraries = MC Support Target add_to_library_groups = X86 - diff --git a/lib/Target/X86/Utils/CMakeLists.txt b/lib/Target/X86/Utils/CMakeLists.txt index caffd8b..2e72c34 100644 --- a/lib/Target/X86/Utils/CMakeLists.txt +++ b/lib/Target/X86/Utils/CMakeLists.txt @@ -4,9 +4,4 @@ add_llvm_library(LLVMX86Utils X86ShuffleDecode.cpp ) -add_llvm_library_dependencies(LLVMX86Utils - LLVMCore - LLVMSupport - ) - add_dependencies(LLVMX86Utils X86CommonTableGen) diff --git a/lib/Target/X86/Utils/LLVMBuild.txt b/lib/Target/X86/Utils/LLVMBuild.txt index 3ee441e..de0a30f 100644 --- a/lib/Target/X86/Utils/LLVMBuild.txt +++ b/lib/Target/X86/Utils/LLVMBuild.txt @@ -21,4 +21,3 @@ name = X86Utils parent = X86 required_libraries = Core Support add_to_library_groups = X86 - diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index f6c9d7b..e7631b6 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -95,54 +95,31 @@ void DecodePSHUFLWMask(unsigned Imm, ShuffleMask.push_back(7); } -void DecodePUNPCKLBWMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask); -} - -void DecodePUNPCKLWDMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask); -} - -void DecodePUNPCKLDQMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask); -} - -void DecodePUNPCKLQDQMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask); -} - -void DecodePUNPCKLMask(EVT VT, - SmallVectorImpl<unsigned> &ShuffleMask) { - DecodeUNPCKLPMask(VT, ShuffleMask); -} +void DecodeSHUFPMask(EVT VT, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); -void DecodePUNPCKHMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(i+NElts/2); - ShuffleMask.push_back(i+NElts+NElts/2); - } -} + unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumLaneElts = NumElts / NumLanes; -void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { - // Part that reads from dest. - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(Imm % NElts); - Imm /= NElts; - } - // Part that reads from src. - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(Imm % NElts + NElts); - Imm /= NElts; + int NewImm = Imm; + for (unsigned l = 0; l < NumLanes; ++l) { + unsigned LaneStart = l * NumLaneElts; + // Part that reads from dest. + for (unsigned i = 0; i != NumLaneElts/2; ++i) { + ShuffleMask.push_back(NewImm % NumLaneElts + LaneStart); + NewImm /= NumLaneElts; + } + // Part that reads from src. + for (unsigned i = 0; i != NumLaneElts/2; ++i) { + ShuffleMask.push_back(NewImm % NumLaneElts + NumElts + LaneStart); + NewImm /= NumLaneElts; + } + if (NumLaneElts == 4) NewImm = Imm; // reload imm } } -void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -161,10 +138,10 @@ void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { } } -/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd +/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd /// etc. VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -183,36 +160,23 @@ void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { } } -// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit -// elements. For 256-bit vectors, it's considered as two 128 lanes, the -// referenced elements can't cross lanes and the mask of the first lane must -// be the same of the second. -void DecodeVPERMILPSMask(unsigned NumElts, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { - unsigned NumLanes = (NumElts*32)/128; - unsigned LaneSize = NumElts/NumLanes; - - for (unsigned l = 0; l != NumLanes; ++l) { - for (unsigned i = 0; i != LaneSize; ++i) { - unsigned Idx = (Imm >> (i*2)) & 0x3 ; - ShuffleMask.push_back(Idx+(l*LaneSize)); - } - } -} +// DecodeVPERMILPMask - Decodes VPERMILPS/ VPERMILPD permutes for any 128-bit +// 32-bit or 64-bit elements. For 256-bit vectors, it's considered as two 128 +// lanes. For VPERMILPS, referenced elements can't cross lanes and the mask of +// the first lane must be the same of the second. +void DecodeVPERMILPMask(EVT VT, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); -// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit -// elements. For 256-bit vectors, it's considered as two 128 lanes, the -// referenced elements can't cross lanes but the mask of the first lane can -// be the different of the second (not like VPERMILPS). -void DecodeVPERMILPDMask(unsigned NumElts, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { - unsigned NumLanes = (NumElts*64)/128; - unsigned LaneSize = NumElts/NumLanes; + unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumLaneElts = NumElts / NumLanes; - for (unsigned l = 0; l < NumLanes; ++l) { - for (unsigned i = l*LaneSize; i < LaneSize*(l+1); ++i) { - unsigned Idx = (Imm >> i) & 0x1; - ShuffleMask.push_back(Idx+(l*LaneSize)); + for (unsigned l = 0; l != NumLanes; ++l) { + unsigned LaneStart = l*NumLaneElts; + for (unsigned i = 0; i != NumLaneElts; ++i) { + unsigned Idx = NumLaneElts == 4 ? (Imm >> (i*2)) & 0x3 + : (Imm >> (i+LaneStart)) & 0x1; + ShuffleMask.push_back(Idx+LaneStart); } } } diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 35f6530..243728f 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -46,50 +46,25 @@ void DecodePSHUFHWMask(unsigned Imm, void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask); -void DecodePUNPCKLBWMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask); - -void DecodePUNPCKLWDMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask); - -void DecodePUNPCKLDQMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask); - -void DecodePUNPCKLQDQMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask); - -void DecodePUNPCKLMask(EVT VT, - SmallVectorImpl<unsigned> &ShuffleMask); - -void DecodePUNPCKHMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask); - -void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeSHUFPMask(EVT VT, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); -/// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd +/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd /// etc. VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); -/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd +/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd /// etc. VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); - +void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); -// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit -// elements. For 256-bit vectors, it's considered as two 128 lanes, the -// referenced elements can't cross lanes and the mask of the first lane must -// be the same of the second. -void DecodeVPERMILPSMask(unsigned NElts, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask); -// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit -// elements. For 256-bit vectors, it's considered as two 128 lanes, the -// referenced elements can't cross lanes but the mask of the first lane can -// be the different of the second (not like VPERMILPS). -void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm, +// DecodeVPERMILPMask - Decodes VPERMILPS/ VPERMILPD permutes for any 128-bit +// 32-bit or 64-bit elements. For 256-bit vectors, it's considered as two 128 +// lanes. For VPERMILPS, referenced elements can't cross lanes and the mask of +// the first lane must be the same of the second. +void DecodeVPERMILPMask(EVT VT, unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask); void DecodeVPERM2F128Mask(unsigned Imm, diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 62a7016..8229ca5 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -91,6 +91,8 @@ def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true", "Enable three-operand fused multiple-add">; def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", "Enable four-operand fused multiple-add">; +def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", + "Enable XOP instructions">; def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem", "HasVectorUAMem", "true", "Allow unaligned memory operands on vector/SIMD instructions">; @@ -194,14 +196,16 @@ def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A, - Feature3DNowA, FeatureCMPXCHG16B, + Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT, FeatureSlowBTMem]>; -def : Proc<"barcelona", [FeatureSSE3, FeatureSSE4A, - Feature3DNowA, FeatureCMPXCHG16B, - FeatureSlowBTMem]>; -def : Proc<"istanbul", [Feature3DNowA, FeatureCMPXCHG16B, - FeatureSSE4A]>; -def : Proc<"shanghai", [Feature3DNowA, FeatureCMPXCHG16B, FeatureSSE4A]>; +// FIXME: Disabling AVX for now since it's not ready. +def : Proc<"bdver1", [FeatureSSE42, FeatureSSE4A, FeatureCMPXCHG16B, + FeatureAES, FeatureCLMUL, FeatureFMA4, + FeatureXOP, FeatureLZCNT]>; +def : Proc<"bdver2", [FeatureSSE42, FeatureSSE4A, FeatureCMPXCHG16B, + FeatureAES, FeatureCLMUL, FeatureFMA4, + FeatureXOP, FeatureF16C, FeatureLZCNT, + FeatureBMI]>; def : Proc<"winchip-c6", [FeatureMMX]>; def : Proc<"winchip2", [Feature3DNow]>; diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 77b9905..aab2a05 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -158,10 +158,15 @@ def CC_X86_64_C : CallingConv<[ CCIfSubtarget<"hasXMM()", CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, - // The first 8 256-bit vector arguments are passed in YMM registers. - CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], - CCIfSubtarget<"hasAVX()", - CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7]>>>, + // The first 8 256-bit vector arguments are passed in YMM registers, unless + // this is a vararg function. + // FIXME: This isn't precisely correct; the x86-64 ABI document says that + // fixed arguments to vararg functions are supposed to be passed in + // registers. Actually modeling that would be a lot of work, though. + CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + CCIfSubtarget<"hasAVX()", + CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, + YMM4, YMM5, YMM6, YMM7]>>>>, // Integer/FP values get stored in stack slots that are 8 bytes in size and // 8-byte aligned if there are no more registers to hold them. diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index ba615a8..ed16e88 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -1004,7 +1004,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, break; } - if (!Desc->isVariadic() && CurOp != NumOps) { + if (!MI.isVariadic() && CurOp != NumOps) { #ifndef NDEBUG dbgs() << "Cannot encode all operands of: " << MI << "\n"; #endif diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 32f1770..1589439 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -728,7 +728,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. - if (CC == CallingConv::Fast && GuaranteedTailCallOpt) + if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) return false; // Let SDISel handle vararg functions. @@ -1529,7 +1529,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. - if (CC == CallingConv::Fast && GuaranteedTailCallOpt) + if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) return false; PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); @@ -1543,7 +1543,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Fast-isel doesn't know about callee-pop yet. if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg, - GuaranteedTailCallOpt)) + TM.Options.GuaranteedTailCallOpt)) return false; // Check whether the function can return without sret-demotion. @@ -2121,7 +2121,7 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) { default: return false; case MVT::f32: if (X86ScalarSSEf32) { - Opc = Subtarget->hasAVX() ? X86::VFsFLD0SS : X86::FsFLD0SS; + Opc = X86::FsFLD0SS; RC = X86::FR32RegisterClass; } else { Opc = X86::LD_Fp032; @@ -2130,7 +2130,7 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) { break; case MVT::f64: if (X86ScalarSSEf64) { - Opc = Subtarget->hasAVX() ? X86::VFsFLD0SD : X86::FsFLD0SD; + Opc = X86::FsFLD0SD; RC = X86::FR64RegisterClass; } else { Opc = X86::LD_Fp064; diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 819d242..6a40cc1 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -47,7 +47,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const { const MachineModuleInfo &MMI = MF.getMMI(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); - return (DisableFramePointerElim(MF) || + return (MF.getTarget().Options.DisableFramePointerElim(MF) || RI->needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || @@ -210,7 +210,7 @@ static void mergeSPUpdatesDown(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, unsigned StackPtr, uint64_t *NumBytes = NULL) { - // FIXME: THIS ISN'T RUN!!! + // FIXME: THIS ISN'T RUN!!! return; if (MBBI == MBB.end()) return; @@ -351,20 +351,22 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, /// register. The number corresponds to the enum lists in /// compact_unwind_encoding.h. static int getCompactUnwindRegNum(const unsigned *CURegs, unsigned Reg) { - int Idx = 1; - for (; *CURegs; ++CURegs, ++Idx) + for (int Idx = 1; *CURegs; ++CURegs, ++Idx) if (*CURegs == Reg) return Idx; return -1; } +// Number of registers that can be saved in a compact unwind encoding. +#define CU_NUM_SAVED_REGS 6 + /// encodeCompactUnwindRegistersWithoutFrame - Create the permutation encoding /// used with frameless stacks. It is passed the number of registers to be saved /// and an array of the registers saved. -static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6], - unsigned RegCount, - bool Is64Bit) { +static uint32_t +encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], + unsigned RegCount, bool Is64Bit) { // The saved registers are numbered from 1 to 6. In order to encode the order // in which they were saved, we re-number them according to their place in the // register order. The re-numbering is relative to the last re-numbered @@ -385,14 +387,21 @@ static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6], }; const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs); - uint32_t RenumRegs[6]; - for (unsigned i = 6 - RegCount; i < 6; ++i) { + for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) { int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]); if (CUReg == -1) return ~0U; SavedRegs[i] = CUReg; + } + + // Reverse the list. + std::swap(SavedRegs[0], SavedRegs[5]); + std::swap(SavedRegs[1], SavedRegs[4]); + std::swap(SavedRegs[2], SavedRegs[3]); + uint32_t RenumRegs[CU_NUM_SAVED_REGS]; + for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i) { unsigned Countless = 0; - for (unsigned j = 6 - RegCount; j < i; ++j) + for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j) if (SavedRegs[j] < SavedRegs[i]) ++Countless; @@ -435,8 +444,9 @@ static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6], /// encodeCompactUnwindRegistersWithFrame - Return the registers encoded for a /// compact encoding with a frame pointer. -static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6], - bool Is64Bit) { +static uint32_t +encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], + bool Is64Bit) { static const unsigned CU32BitRegs[] = { X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 }; @@ -448,13 +458,16 @@ static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6], // Encode the registers in the order they were saved, 3-bits per register. The // registers are numbered from 1 to 6. uint32_t RegEnc = 0; - for (int I = 5; I >= 0; --I) { + for (int I = 0; I != 6; --I) { unsigned Reg = SavedRegs[I]; if (Reg == 0) break; int CURegNum = getCompactUnwindRegNum(CURegs, Reg); if (CURegNum == -1) return ~0U; - RegEnc |= (CURegNum & 0x7) << (5 - I); + + // Encode the 3-bit register number in order, skipping over 3-bits for each + // register. + RegEnc |= (CURegNum & 0x7) << ((5 - I) * 3); } assert((RegEnc & 0x7FFF) == RegEnc && "Invalid compact register encoding!"); @@ -466,14 +479,11 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned StackPtr = RegInfo->getStackRegister(); - X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); - bool Is64Bit = STI.is64Bit(); bool HasFP = hasFP(MF); - unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 }; - int SavedRegIdx = 6; + unsigned SavedRegs[CU_NUM_SAVED_REGS] = { 0, 0, 0, 0, 0, 0 }; + unsigned SavedRegIdx = 0; unsigned OffsetSize = (Is64Bit ? 8 : 4); @@ -481,14 +491,13 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { unsigned PushInstrSize = 1; unsigned MoveInstr = (Is64Bit ? X86::MOV64rr : X86::MOV32rr); unsigned MoveInstrSize = (Is64Bit ? 3 : 2); - unsigned SubtractInstr = getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta); unsigned SubtractInstrIdx = (Is64Bit ? 3 : 2); unsigned StackDivide = (Is64Bit ? 8 : 4); unsigned InstrOffset = 0; - unsigned CFAOffset = 0; unsigned StackAdjust = 0; + unsigned StackSize = 0; MachineBasicBlock &MBB = MF.front(); // Prologue is in entry BB. bool ExpectEnd = false; @@ -504,10 +513,10 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { if (Opc == PushInstr) { // If there are too many saved registers, we cannot use compact encoding. - if (--SavedRegIdx < 0) return 0; + if (SavedRegIdx >= CU_NUM_SAVED_REGS) return 0; - SavedRegs[SavedRegIdx] = MI.getOperand(0).getReg(); - CFAOffset += OffsetSize; + SavedRegs[SavedRegIdx++] = MI.getOperand(0).getReg(); + StackAdjust += OffsetSize; InstrOffset += PushInstrSize; } else if (Opc == MoveInstr) { unsigned SrcReg = MI.getOperand(1).getReg(); @@ -516,13 +525,14 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { if (DstReg != FramePtr || SrcReg != StackPtr) return 0; - CFAOffset = 0; + StackAdjust = 0; memset(SavedRegs, 0, sizeof(SavedRegs)); - SavedRegIdx = 6; + SavedRegIdx = 0; InstrOffset += MoveInstrSize; - } else if (Opc == SubtractInstr) { - if (StackAdjust) - // We all ready have a stack pointer adjustment. + } else if (Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || + Opc == X86::SUB32ri || Opc == X86::SUB32ri8) { + if (StackSize) + // We already have a stack size. return 0; if (!MI.getOperand(0).isReg() || @@ -533,7 +543,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { // %RSP<def> = SUB64ri8 %RSP, 48 return 0; - StackAdjust = MI.getOperand(2).getImm() / StackDivide; + StackSize = MI.getOperand(2).getImm() / StackDivide; SubtractInstrIdx += InstrOffset; ExpectEnd = true; } @@ -541,28 +551,30 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { // Encode that we are using EBP/RBP as the frame pointer. uint32_t CompactUnwindEncoding = 0; - CFAOffset /= StackDivide; + StackAdjust /= StackDivide; if (HasFP) { - if ((CFAOffset & 0xFF) != CFAOffset) + if ((StackAdjust & 0xFF) != StackAdjust) // Offset was too big for compact encoding. return 0; // Get the encoding of the saved registers when we have a frame pointer. uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit); - if (RegEnc == ~0U) - return 0; + if (RegEnc == ~0U) return 0; CompactUnwindEncoding |= 0x01000000; - CompactUnwindEncoding |= (CFAOffset & 0xFF) << 16; + CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16; CompactUnwindEncoding |= RegEnc & 0x7FFF; } else { - unsigned FullOffset = CFAOffset + StackAdjust; - if ((FullOffset & 0xFF) == FullOffset) { - // Frameless stack. + ++StackAdjust; + uint32_t TotalStackSize = StackAdjust + StackSize; + if ((TotalStackSize & 0xFF) == TotalStackSize) { + // Frameless stack with a small stack size. CompactUnwindEncoding |= 0x02000000; - CompactUnwindEncoding |= (FullOffset & 0xFF) << 16; + + // Encode the stack size. + CompactUnwindEncoding |= (TotalStackSize & 0xFF) << 16; } else { - if ((CFAOffset & 0x7) != CFAOffset) + if ((StackAdjust & 0x7) != StackAdjust) // The extra stack adjustments are too big for us to handle. return 0; @@ -573,16 +585,21 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { // instruction. CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; - // Encode any extra stack stack changes (done via push instructions). - CompactUnwindEncoding |= (CFAOffset & 0x7) << 13; + // Encode any extra stack stack adjustments (done via push instructions). + CompactUnwindEncoding |= (StackAdjust & 0x7) << 13; } + // Encode the number of registers saved. + CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10; + // Get the encoding of the saved registers when we don't have a frame // pointer. - uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegs, - 6 - SavedRegIdx, - Is64Bit); + uint32_t RegEnc = + encodeCompactUnwindRegistersWithoutFrame(SavedRegs, SavedRegIdx, + Is64Bit); if (RegEnc == ~0U) return 0; + + // Encode the register encoding. CompactUnwindEncoding |= RegEnc & 0x3FF; } @@ -638,10 +655,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // stack pointer (we fit in the Red Zone). if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && !RegInfo->needsStackRealignment(MF) && - !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->adjustsStack() && // No calls. - !IsWin64 && // Win64 has no Red Zone - !EnableSegmentedStacks) { // Regular stack + !MFI->hasVarSizedObjects() && // No dynamic alloca. + !MFI->adjustsStack() && // No calls. + !IsWin64 && // Win64 has no Red Zone + !MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); @@ -978,7 +995,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, unsigned Opc = PI->getOpcode(); if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE && - !PI->getDesc().isTerminator()) + !PI->isTerminator()) break; --MBBI; @@ -1306,6 +1323,10 @@ GetScratchRegister(bool Is64Bit, const MachineFunction &MF) { } } +// The stack limit in the TCB is set to this many bytes above the actual stack +// limit. +static const uint64_t kSplitStackAvailable = 256; + void X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { MachineBasicBlock &prologueMBB = MF.front(); @@ -1360,16 +1381,24 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { TlsReg = X86::FS; TlsOffset = 0x70; - BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP) - .addImm(0).addReg(0).addImm(-StackSize).addReg(0); + if (StackSize < kSplitStackAvailable) + ScratchReg = X86::RSP; + else + BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP) + .addImm(0).addReg(0).addImm(-StackSize).addReg(0); + BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg) .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); } else { TlsReg = X86::GS; TlsOffset = 0x30; - BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) - .addImm(0).addReg(0).addImm(-StackSize).addReg(0); + if (StackSize < kSplitStackAvailable) + ScratchReg = X86::ESP; + else + BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) + .addImm(0).addReg(0).addImm(-StackSize).addReg(0); + BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); } @@ -1394,9 +1423,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { MF.getRegInfo().setPhysRegUsed(X86::R10); MF.getRegInfo().setPhysRegUsed(X86::R11); } else { - // Since we'll call __morestack, stack alignment needs to be preserved. - BuildMI(allocMBB, DL, TII.get(X86::SUB32ri), X86::ESP).addReg(X86::ESP) - .addImm(8); BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) .addImm(X86FI->getArgumentStackSize()); BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) @@ -1411,11 +1437,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32)) .addExternalSymbol("__morestack"); - // __morestack only seems to remove 8 bytes off the stack. Add back the - // additional 8 bytes we added before pushing the arguments. - if (!Is64Bit) - BuildMI(allocMBB, DL, TII.get(X86::ADD32ri), X86::ESP).addReg(X86::ESP) - .addImm(8); if (IsNested) BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10)); else diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 96c6f41..03727a2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -256,7 +256,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->is64Bit()) { setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); - } else if (!UseSoftFloat) { + } else if (!TM.Options.UseSoftFloat) { // We have an algorithm for SSE2->double, and we turn this into a // 64-bit FILD followed by conditional FADD for other targets. setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); @@ -270,7 +270,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); - if (!UseSoftFloat) { + if (!TM.Options.UseSoftFloat) { // SSE has no i16 to fp conversion, only i32 if (X86ScalarSSEf32) { setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); @@ -313,7 +313,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->is64Bit()) { setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); - } else if (!UseSoftFloat) { + } else if (!TM.Options.UseSoftFloat) { // Since AVX is a superset of SSE3, only check for SSE here. if (Subtarget->hasSSE1() && !Subtarget->hasSSE3()) // Expand FP_TO_UINT into a select. @@ -378,6 +378,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FREM , MVT::f80 , Expand); setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i16 , Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i64 , Expand); if (Subtarget->hasBMI()) { setOperationAction(ISD::CTTZ , MVT::i8 , Promote); } else { @@ -388,6 +392,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::CTTZ , MVT::i64 , Custom); } + setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i8 , Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i16 , Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i64 , Expand); if (Subtarget->hasLZCNT()) { setOperationAction(ISD::CTLZ , MVT::i8 , Promote); } else { @@ -537,14 +545,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? MVT::i64 : MVT::i32, Custom); - else if (EnableSegmentedStacks) + else if (TM.Options.EnableSegmentedStacks) setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? MVT::i64 : MVT::i32, Custom); else setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? MVT::i64 : MVT::i32, Expand); - if (!UseSoftFloat && X86ScalarSSEf64) { + if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) { // f32 and f64 use SSE. // Set up the FP register classes. addRegisterClass(MVT::f32, X86::FR32RegisterClass); @@ -576,7 +584,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // cases we handle. addLegalFPImmediate(APFloat(+0.0)); // xorpd addLegalFPImmediate(APFloat(+0.0f)); // xorps - } else if (!UseSoftFloat && X86ScalarSSEf32) { + } else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) { // Use SSE for f32, x87 for f64. // Set up the FP register classes. addRegisterClass(MVT::f32, X86::FR32RegisterClass); @@ -605,11 +613,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS - if (!UnsafeFPMath) { + if (!TM.Options.UnsafeFPMath) { setOperationAction(ISD::FSIN , MVT::f64 , Expand); setOperationAction(ISD::FCOS , MVT::f64 , Expand); } - } else if (!UseSoftFloat) { + } else if (!TM.Options.UseSoftFloat) { // f32 and f64 in x87. // Set up the FP register classes. addRegisterClass(MVT::f64, X86::RFP64RegisterClass); @@ -620,7 +628,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); - if (!UnsafeFPMath) { + if (!TM.Options.UnsafeFPMath) { setOperationAction(ISD::FSIN , MVT::f64 , Expand); setOperationAction(ISD::FCOS , MVT::f64 , Expand); } @@ -639,7 +647,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FMA, MVT::f32, Expand); // Long double always uses X87. - if (!UseSoftFloat) { + if (!TM.Options.UseSoftFloat) { addRegisterClass(MVT::f80, X86::RFP80RegisterClass); setOperationAction(ISD::UNDEF, MVT::f80, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); @@ -658,11 +666,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addLegalFPImmediate(TmpFlt2); // FLD1/FCHS } - if (!UnsafeFPMath) { + if (!TM.Options.UnsafeFPMath) { setOperationAction(ISD::FSIN , MVT::f80 , Expand); setOperationAction(ISD::FCOS , MVT::f80 , Expand); } + setOperationAction(ISD::FFLOOR, MVT::f80, Expand); + setOperationAction(ISD::FCEIL, MVT::f80, Expand); + setOperationAction(ISD::FTRUNC, MVT::f80, Expand); + setOperationAction(ISD::FRINT, MVT::f80, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand); setOperationAction(ISD::FMA, MVT::f80, Expand); } @@ -714,7 +727,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand); @@ -748,7 +763,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // FIXME: In order to prevent SSE instructions being expanded to MMX ones // with -msoft-float, disable use of MMX as well. - if (!UseSoftFloat && Subtarget->hasMMX()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) { addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass); // No operations on x86mmx supported, everything uses intrinsics. } @@ -785,7 +800,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::BITCAST, MVT::v2i32, Expand); setOperationAction(ISD::BITCAST, MVT::v1i64, Expand); - if (!UseSoftFloat && Subtarget->hasXMM()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasXMM()) { addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); setOperationAction(ISD::FADD, MVT::v4f32, Legal); @@ -802,7 +817,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SETCC, MVT::v4f32, Custom); } - if (!UseSoftFloat && Subtarget->hasXMMInt()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasXMMInt()) { addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM @@ -983,7 +998,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->hasSSE42orAVX()) setOperationAction(ISD::SETCC, MVT::v2i64, Custom); - if (!UseSoftFloat && Subtarget->hasAVX()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) { addRegisterClass(MVT::v32i8, X86::VR256RegisterClass); addRegisterClass(MVT::v16i16, X86::VR256RegisterClass); addRegisterClass(MVT::v8i32, X86::VR256RegisterClass); @@ -1211,10 +1226,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4; maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4; - setPrefLoopAlignment(16); + setPrefLoopAlignment(4); // 2^4 bytes. benefitFromCodePlacementOpt = true; - setPrefFunctionAlignment(4); + setPrefFunctionAlignment(4); // 2^4 bytes. } @@ -1709,7 +1724,8 @@ bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { /// FuncIsMadeTailCallSafe - Return true if the function is being made into /// a tailcall target by changing its ABI. -static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) { +static bool FuncIsMadeTailCallSafe(CallingConv::ID CC, + bool GuaranteedTailCallOpt) { return GuaranteedTailCallOpt && IsTailCallConvention(CC); } @@ -1723,7 +1739,8 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, unsigned i) const { // Create the nodes corresponding to a load from this parameter slot. ISD::ArgFlagsTy Flags = Ins[i].Flags; - bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv); + bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv, + getTargetMachine().Options.GuaranteedTailCallOpt); bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); EVT ValVT; @@ -1873,7 +1890,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, unsigned StackSize = CCInfo.getNextStackOffset(); // Align stack specially for tail calls. - if (FuncIsMadeTailCallSafe(CallConv)) + if (FuncIsMadeTailCallSafe(CallConv, + MF.getTarget().Options.GuaranteedTailCallOpt)) StackSize = GetAlignedArgumentStackSize(StackSize, DAG); // If the function takes variable number of arguments, make a frame index for @@ -1918,9 +1936,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat); assert(!(NumXMMRegs && !Subtarget->hasXMM()) && "SSE register cannot be used when SSE is disabled!"); - assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) && + assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat && + NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"); - if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM()) + if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps || + !Subtarget->hasXMM()) // Kernel mode asks for SSE to be disabled, so don't push them // on the stack. TotalNumXMMRegs = 0; @@ -1998,7 +2018,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, } // Some CCs need callee pop. - if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) { + if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, + MF.getTarget().Options.GuaranteedTailCallOpt)) { FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything. } else { FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. @@ -2098,7 +2119,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Sibcalls are automatically detected tailcalls which do not require // ABI changes. - if (!GuaranteedTailCallOpt && isTailCall) + if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall) IsSibcall = true; if (isTailCall) @@ -2126,7 +2147,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // This is a sibcall. The memory operands are available in caller's // own caller's stack. NumBytes = 0; - else if (GuaranteedTailCallOpt && IsTailCallConvention(CallConv)) + else if (getTargetMachine().Options.GuaranteedTailCallOpt && + IsTailCallConvention(CallConv)) NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); int FPDiff = 0; @@ -2305,7 +2327,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, int FI = 0; // Do not flag preceding copytoreg stuff together with the following stuff. InFlag = SDValue(); - if (GuaranteedTailCallOpt) { + if (getTargetMachine().Options.GuaranteedTailCallOpt) { for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (VA.isRegLoc()) @@ -2485,7 +2507,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create the CALLSEQ_END node. unsigned NumBytesForCalleeToPush; - if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) + if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, + getTargetMachine().Options.GuaranteedTailCallOpt)) NumBytesForCalleeToPush = NumBytes; // Callee pops everything else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet) // If this is a call to a struct-return function, the callee @@ -2643,7 +2666,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CallerCC = CallerF->getCallingConv(); bool CCMatch = CallerCC == CalleeCC; - if (GuaranteedTailCallOpt) { + if (getTargetMachine().Options.GuaranteedTailCallOpt) { if (IsTailCallConvention(CalleeCC) && CCMatch) return true; return false; @@ -2843,23 +2866,10 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::MOVDDUP: case X86ISD::MOVSS: case X86ISD::MOVSD: - case X86ISD::UNPCKLPS: - case X86ISD::UNPCKLPD: - case X86ISD::PUNPCKLWD: - case X86ISD::PUNPCKLBW: - case X86ISD::PUNPCKLDQ: - case X86ISD::PUNPCKLQDQ: - case X86ISD::UNPCKHPS: - case X86ISD::UNPCKHPD: - case X86ISD::PUNPCKHWD: - case X86ISD::PUNPCKHBW: - case X86ISD::PUNPCKHDQ: - case X86ISD::PUNPCKHQDQ: - case X86ISD::VPERMILPS: - case X86ISD::VPERMILPSY: - case X86ISD::VPERMILPD: - case X86ISD::VPERMILPDY: - case X86ISD::VPERM2F128: + case X86ISD::UNPCKL: + case X86ISD::UNPCKH: + case X86ISD::VPERMILP: + case X86ISD::VPERM2X128: return true; } return false; @@ -2885,10 +2895,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PSHUFD: case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: - case X86ISD::VPERMILPS: - case X86ISD::VPERMILPSY: - case X86ISD::VPERMILPD: - case X86ISD::VPERMILPDY: + case X86ISD::VPERMILP: return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8)); } @@ -2902,7 +2909,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PALIGN: case X86ISD::SHUFPD: case X86ISD::SHUFPS: - case X86ISD::VPERM2F128: + case X86ISD::VPERM2X128: return DAG.getNode(Opc, dl, VT, V1, V2, DAG.getConstant(TargetMask, MVT::i8)); } @@ -2920,18 +2927,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::MOVLPD: case X86ISD::MOVSS: case X86ISD::MOVSD: - case X86ISD::UNPCKLPS: - case X86ISD::UNPCKLPD: - case X86ISD::PUNPCKLWD: - case X86ISD::PUNPCKLBW: - case X86ISD::PUNPCKLDQ: - case X86ISD::PUNPCKLQDQ: - case X86ISD::UNPCKHPS: - case X86ISD::UNPCKHPD: - case X86ISD::PUNPCKHWD: - case X86ISD::PUNPCKHBW: - case X86ISD::PUNPCKHDQ: - case X86ISD::PUNPCKHQDQ: + case X86ISD::UNPCKL: + case X86ISD::UNPCKH: return DAG.getNode(Opc, dl, VT, V1, V2); } return SDValue(); @@ -3231,7 +3228,7 @@ bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) { static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT, bool hasSSSE3OrAVX) { int i, e = VT.getVectorNumElements(); - if (VT.getSizeInBits() != 128 && VT.getSizeInBits() != 64) + if (VT.getSizeInBits() != 128) return false; // Do not handle v2i64 / v2f64 shuffles with palignr. @@ -3261,17 +3258,17 @@ static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT, return true; } -/// isVSHUFPSYMask - Return true if the specified VECTOR_SHUFFLE operand +/// isVSHUFPYMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to 256-bit /// VSHUFPSY. -static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT, - const X86Subtarget *Subtarget) { +static bool isVSHUFPYMask(const SmallVectorImpl<int> &Mask, EVT VT, + bool HasAVX, bool Commuted = false) { int NumElems = VT.getVectorNumElements(); - if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256) + if (!HasAVX || VT.getSizeInBits() != 256) return false; - if (NumElems != 8) + if (NumElems != 4 && NumElems != 8) return false; // VSHUFPSY divides the resulting vector into 4 chunks. @@ -3284,124 +3281,63 @@ static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT, // DST => Y7..Y4, Y7..Y4, X7..X4, X7..X4, // Y3..Y0, Y3..Y0, X3..X0, X3..X0 // - int QuarterSize = NumElems/4; - int HalfSize = QuarterSize*2; - for (int i = 0; i < QuarterSize; ++i) - if (!isUndefOrInRange(Mask[i], 0, HalfSize)) - return false; - for (int i = QuarterSize; i < QuarterSize*2; ++i) - if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize)) - return false; - - // The mask of the second half must be the same as the first but with - // the appropriate offsets. This works in the same way as VPERMILPS - // works with masks. - for (int i = QuarterSize*2; i < QuarterSize*3; ++i) { - if (!isUndefOrInRange(Mask[i], HalfSize, NumElems)) - return false; - int FstHalfIdx = i-HalfSize; - if (Mask[FstHalfIdx] < 0) - continue; - if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize)) - return false; - } - for (int i = QuarterSize*3; i < NumElems; ++i) { - if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2)) - return false; - int FstHalfIdx = i-HalfSize; - if (Mask[FstHalfIdx] < 0) - continue; - if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize)) - return false; - - } - - return true; -} - -/// getShuffleVSHUFPSYImmediate - Return the appropriate immediate to shuffle -/// the specified VECTOR_MASK mask with VSHUFPSY instruction. -static unsigned getShuffleVSHUFPSYImmediate(SDNode *N) { - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); - EVT VT = SVOp->getValueType(0); - int NumElems = VT.getVectorNumElements(); - - assert(NumElems == 8 && VT.getSizeInBits() == 256 && - "Only supports v8i32 and v8f32 types"); - - int HalfSize = NumElems/2; - unsigned Mask = 0; - for (int i = 0; i != NumElems ; ++i) { - if (SVOp->getMaskElt(i) < 0) - continue; - // The mask of the first half must be equal to the second one. - unsigned Shamt = (i%HalfSize)*2; - unsigned Elt = SVOp->getMaskElt(i) % HalfSize; - Mask |= Elt << Shamt; - } - - return Mask; -} - -/// isVSHUFPDYMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to 256-bit -/// VSHUFPDY. This shuffle doesn't have the same restriction as the PS -/// version and the mask of the second half isn't binded with the first -/// one. -static bool isVSHUFPDYMask(const SmallVectorImpl<int> &Mask, EVT VT, - const X86Subtarget *Subtarget) { - int NumElems = VT.getVectorNumElements(); - - if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256) - return false; - - if (NumElems != 4) - return false; - - // VSHUFPSY divides the resulting vector into 4 chunks. + // VSHUFPDY divides the resulting vector into 4 chunks. // The sources are also splitted into 4 chunks, and each destination // chunk must come from a different source chunk. // // SRC1 => X3 X2 X1 X0 // SRC2 => Y3 Y2 Y1 Y0 // - // DST => Y2..Y3, X2..X3, Y1..Y0, X1..X0 + // DST => Y3..Y2, X3..X2, Y1..Y0, X1..X0 // - int QuarterSize = NumElems/4; - int HalfSize = QuarterSize*2; - for (int i = 0; i < QuarterSize; ++i) - if (!isUndefOrInRange(Mask[i], 0, HalfSize)) - return false; - for (int i = QuarterSize; i < QuarterSize*2; ++i) - if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize)) - return false; - for (int i = QuarterSize*2; i < QuarterSize*3; ++i) - if (!isUndefOrInRange(Mask[i], HalfSize, NumElems)) - return false; - for (int i = QuarterSize*3; i < NumElems; ++i) - if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2)) - return false; + unsigned QuarterSize = NumElems/4; + unsigned HalfSize = QuarterSize*2; + for (unsigned l = 0; l != 2; ++l) { + unsigned LaneStart = l*HalfSize; + for (unsigned s = 0; s != 2; ++s) { + unsigned QuarterStart = s*QuarterSize; + unsigned Src = (Commuted) ? (1-s) : s; + unsigned SrcStart = Src*NumElems + LaneStart; + for (unsigned i = 0; i != QuarterSize; ++i) { + int Idx = Mask[i+QuarterStart+LaneStart]; + if (!isUndefOrInRange(Idx, SrcStart, SrcStart+HalfSize)) + return false; + // For VSHUFPSY, the mask of the second half must be the same as the first + // but with the appropriate offsets. This works in the same way as + // VPERMILPS works with masks. + if (NumElems == 4 || l == 0 || Mask[i+QuarterStart] < 0) + continue; + if (!isUndefOrEqual(Idx, Mask[i+QuarterStart]+HalfSize)) + return false; + } + } + } return true; } -/// getShuffleVSHUFPDYImmediate - Return the appropriate immediate to shuffle -/// the specified VECTOR_MASK mask with VSHUFPDY instruction. -static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) { +/// getShuffleVSHUFPYImmediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_MASK mask with VSHUFPSY/VSHUFPDY instructions. +static unsigned getShuffleVSHUFPYImmediate(SDNode *N) { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); EVT VT = SVOp->getValueType(0); int NumElems = VT.getVectorNumElements(); - assert(NumElems == 4 && VT.getSizeInBits() == 256 && - "Only supports v4i64 and v4f64 types"); + assert(VT.getSizeInBits() == 256 && "Only supports 256-bit types"); + assert((NumElems == 4 || NumElems == 8) && "Only supports v4 and v8 types"); int HalfSize = NumElems/2; + unsigned Mul = (NumElems == 8) ? 2 : 1; unsigned Mask = 0; - for (int i = 0; i != NumElems ; ++i) { - if (SVOp->getMaskElt(i) < 0) + for (int i = 0; i != NumElems; ++i) { + int Elt = SVOp->getMaskElt(i); + if (Elt < 0) continue; - int Elt = SVOp->getMaskElt(i) % HalfSize; - Mask |= Elt << i; + Elt %= HalfSize; + unsigned Shamt = i; + // For VSHUFPSY, the mask of the first half must be equal to the second one. + if (NumElems == 8) Shamt %= HalfSize; + Mask |= Elt << (Shamt*Mul); } return Mask; @@ -3409,8 +3345,8 @@ static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) { /// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming /// the two vector operands have swapped position. -static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) { - unsigned NumElems = VT.getVectorNumElements(); +static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, + unsigned NumElems) { for (unsigned i = 0; i != NumElems; ++i) { int idx = Mask[i]; if (idx < 0) @@ -3422,31 +3358,13 @@ static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) { } } -/// isCommutedVSHUFP() - Return true if swapping operands will -/// allow to use the "vshufpd" or "vshufps" instruction -/// for 256-bit vectors -static bool isCommutedVSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT, - const X86Subtarget *Subtarget) { - - unsigned NumElems = VT.getVectorNumElements(); - if ((VT.getSizeInBits() != 256) || ((NumElems != 4) && (NumElems != 8))) - return false; - - SmallVector<int, 8> CommutedMask; - for (unsigned i = 0; i < NumElems; ++i) - CommutedMask.push_back(Mask[i]); - - CommuteVectorShuffleMask(CommutedMask, VT); - return (NumElems == 4) ? isVSHUFPDYMask(CommutedMask, VT, Subtarget): - isVSHUFPSYMask(CommutedMask, VT, Subtarget); -} - - /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to 128-bit -/// SHUFPS and SHUFPD. -static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) { - int NumElems = VT.getVectorNumElements(); +/// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be +/// reverse of what x86 shuffles want. +static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT, + bool Commuted = false) { + unsigned NumElems = VT.getVectorNumElements(); if (VT.getSizeInBits() != 128) return false; @@ -3454,12 +3372,14 @@ static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) { if (NumElems != 2 && NumElems != 4) return false; - int Half = NumElems / 2; - for (int i = 0; i < Half; ++i) - if (!isUndefOrInRange(Mask[i], 0, NumElems)) + unsigned Half = NumElems / 2; + unsigned SrcStart = Commuted ? NumElems : 0; + for (unsigned i = 0; i != Half; ++i) + if (!isUndefOrInRange(Mask[i], SrcStart, SrcStart+NumElems)) return false; - for (int i = Half; i < NumElems; ++i) - if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) + SrcStart = Commuted ? 0 : NumElems; + for (unsigned i = Half; i != NumElems; ++i) + if (!isUndefOrInRange(Mask[i], SrcStart, SrcStart+NumElems)) return false; return true; @@ -3471,32 +3391,6 @@ bool X86::isSHUFPMask(ShuffleVectorSDNode *N) { return ::isSHUFPMask(M, N->getValueType(0)); } -/// isCommutedSHUFP - Returns true if the shuffle mask is exactly -/// the reverse of what x86 shuffles want. x86 shuffles requires the lower -/// half elements to come from vector 1 (which would equal the dest.) and -/// the upper half to come from vector 2. -static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) { - int NumElems = VT.getVectorNumElements(); - - if (NumElems != 2 && NumElems != 4) - return false; - - int Half = NumElems / 2; - for (int i = 0; i < Half; ++i) - if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) - return false; - for (int i = Half; i < NumElems; ++i) - if (!isUndefOrInRange(Mask[i], 0, NumElems)) - return false; - return true; -} - -static bool isCommutedSHUFP(ShuffleVectorSDNode *N) { - SmallVector<int, 8> M; - N->getMask(M); - return isCommutedSHUFPMask(M, N->getValueType(0)); -} - /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { @@ -3765,15 +3659,15 @@ bool X86::isMOVLMask(ShuffleVectorSDNode *N) { return ::isMOVLMask(M, N->getValueType(0)); } -/// isVPERM2F128Mask - Match 256-bit shuffles where the elements are considered +/// isVPERM2X128Mask - Match 256-bit shuffles where the elements are considered /// as permutations between 128-bit chunks or halves. As an example: this /// shuffle bellow: /// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15> /// The first half comes from the second half of V1 and the second half from the /// the second half of V2. -static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT, - const X86Subtarget *Subtarget) { - if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256) +static bool isVPERM2X128Mask(const SmallVectorImpl<int> &Mask, EVT VT, + bool HasAVX) { + if (!HasAVX || VT.getSizeInBits() != 256) return false; // The shuffle result is divided into half A and half B. In total the two @@ -3801,10 +3695,9 @@ static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT, return MatchA && MatchB; } -/// getShuffleVPERM2F128Immediate - Return the appropriate immediate to shuffle -/// the specified VECTOR_MASK mask with VPERM2F128 instructions. -static unsigned getShuffleVPERM2F128Immediate(SDNode *N) { - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); +/// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions. +static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) { EVT VT = SVOp->getValueType(0); int HalfSize = VT.getVectorNumElements()/2; @@ -3826,81 +3719,47 @@ static unsigned getShuffleVPERM2F128Immediate(SDNode *N) { return (FstHalf | (SndHalf << 4)); } -/// isVPERMILPDMask - Return true if the specified VECTOR_SHUFFLE operand +/// isVPERMILPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to VPERMILPD*. /// Note that VPERMIL mask matching is different depending whether theunderlying /// type is 32 or 64. In the VPERMILPS the high half of the mask should point /// to the same elements of the low, but to the higher half of the source. /// In VPERMILPD the two lanes could be shuffled independently of each other /// with the same restriction that lanes can't be crossed. -static bool isVPERMILPDMask(const SmallVectorImpl<int> &Mask, EVT VT, - const X86Subtarget *Subtarget) { +static bool isVPERMILPMask(const SmallVectorImpl<int> &Mask, EVT VT, + bool HasAVX) { int NumElts = VT.getVectorNumElements(); int NumLanes = VT.getSizeInBits()/128; - if (!Subtarget->hasAVX()) + if (!HasAVX) return false; - // Only match 256-bit with 64-bit types - if (VT.getSizeInBits() != 256 || NumElts != 4) + // Only match 256-bit with 32/64-bit types + if (VT.getSizeInBits() != 256 || (NumElts != 4 && NumElts != 8)) return false; - // The mask on the high lane is independent of the low. Both can match - // any element in inside its own lane, but can't cross. int LaneSize = NumElts/NumLanes; - for (int l = 0; l < NumLanes; ++l) - for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) { - int LaneStart = l*LaneSize; - if (!isUndefOrInRange(Mask[i], LaneStart, LaneStart+LaneSize)) + for (int l = 0; l != NumLanes; ++l) { + int LaneStart = l*LaneSize; + for (int i = 0; i != LaneSize; ++i) { + if (!isUndefOrInRange(Mask[i+LaneStart], LaneStart, LaneStart+LaneSize)) + return false; + if (NumElts == 4 || l == 0) + continue; + // VPERMILPS handling + if (Mask[i] < 0) + continue; + if (!isUndefOrEqual(Mask[i+LaneStart], Mask[i]+LaneSize)) return false; } - - return true; -} - -/// isVPERMILPSMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to VPERMILPS*. -/// Note that VPERMIL mask matching is different depending whether theunderlying -/// type is 32 or 64. In the VPERMILPS the high half of the mask should point -/// to the same elements of the low, but to the higher half of the source. -/// In VPERMILPD the two lanes could be shuffled independently of each other -/// with the same restriction that lanes can't be crossed. -static bool isVPERMILPSMask(const SmallVectorImpl<int> &Mask, EVT VT, - const X86Subtarget *Subtarget) { - unsigned NumElts = VT.getVectorNumElements(); - unsigned NumLanes = VT.getSizeInBits()/128; - - if (!Subtarget->hasAVX()) - return false; - - // Only match 256-bit with 32-bit types - if (VT.getSizeInBits() != 256 || NumElts != 8) - return false; - - // The mask on the high lane should be the same as the low. Actually, - // they can differ if any of the corresponding index in a lane is undef - // and the other stays in range. - int LaneSize = NumElts/NumLanes; - for (int i = 0; i < LaneSize; ++i) { - int HighElt = i+LaneSize; - bool HighValid = isUndefOrInRange(Mask[HighElt], LaneSize, NumElts); - bool LowValid = isUndefOrInRange(Mask[i], 0, LaneSize); - - if (!HighValid || !LowValid) - return false; - if (Mask[i] < 0 || Mask[HighElt] < 0) - continue; - if (Mask[HighElt]-Mask[i] != LaneSize) - return false; } return true; } -/// getShuffleVPERMILPSImmediate - Return the appropriate immediate to shuffle -/// the specified VECTOR_MASK mask with VPERMILPS* instructions. -static unsigned getShuffleVPERMILPSImmediate(SDNode *N) { - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); +/// getShuffleVPERMILPImmediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_MASK mask with VPERMILPS/D* instructions. +static unsigned getShuffleVPERMILPImmediate(ShuffleVectorSDNode *SVOp) { EVT VT = SVOp->getValueType(0); int NumElts = VT.getVectorNumElements(); @@ -3911,43 +3770,22 @@ static unsigned getShuffleVPERMILPSImmediate(SDNode *N) { // where a mask will match because the same mask element is undef on the // first half but valid on the second. This would get pathological cases // such as: shuffle <u, 0, 1, 2, 4, 4, 5, 6>, which is completely valid. + unsigned Shift = (LaneSize == 4) ? 2 : 1; unsigned Mask = 0; - for (int l = 0; l < NumLanes; ++l) { - for (int i = 0; i < LaneSize; ++i) { - int MaskElt = SVOp->getMaskElt(i+(l*LaneSize)); - if (MaskElt < 0) - continue; - if (MaskElt >= LaneSize) - MaskElt -= LaneSize; - Mask |= MaskElt << (i*2); - } + for (int i = 0; i != NumElts; ++i) { + int MaskElt = SVOp->getMaskElt(i); + if (MaskElt < 0) + continue; + MaskElt %= LaneSize; + unsigned Shamt = i; + // VPERMILPSY, the mask of the first half must be equal to the second one + if (NumElts == 8) Shamt %= LaneSize; + Mask |= MaskElt << (Shamt*Shift); } return Mask; } -/// getShuffleVPERMILPDImmediate - Return the appropriate immediate to shuffle -/// the specified VECTOR_MASK mask with VPERMILPD* instructions. -static unsigned getShuffleVPERMILPDImmediate(SDNode *N) { - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); - EVT VT = SVOp->getValueType(0); - - int NumElts = VT.getVectorNumElements(); - int NumLanes = VT.getSizeInBits()/128; - - unsigned Mask = 0; - int LaneSize = NumElts/NumLanes; - for (int l = 0; l < NumLanes; ++l) - for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) { - int MaskElt = SVOp->getMaskElt(i); - if (MaskElt < 0) - continue; - Mask |= (MaskElt-l*LaneSize) << i; - } - - return Mask; -} - /// isCommutedMOVL - Returns true if the shuffle mask is except the reverse /// of what x86 movss want. X86 movs requires the lowest element to be lowest /// element of vector 2 and the other elements to come from vector 1 in order. @@ -4035,21 +3873,18 @@ bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N, /// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to 256-bit /// version of MOVDDUP. -static bool isMOVDDUPYMask(ShuffleVectorSDNode *N, - const X86Subtarget *Subtarget) { - EVT VT = N->getValueType(0); +static bool isMOVDDUPYMask(const SmallVectorImpl<int> &Mask, EVT VT, + bool HasAVX) { int NumElts = VT.getVectorNumElements(); - bool V2IsUndef = N->getOperand(1).getOpcode() == ISD::UNDEF; - if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256 || - !V2IsUndef || NumElts != 4) + if (!HasAVX || VT.getSizeInBits() != 256 || NumElts != 4) return false; for (int i = 0; i != NumElts/2; ++i) - if (!isUndefOrEqual(N->getMaskElt(i), 0)) + if (!isUndefOrEqual(Mask[i], 0)) return false; for (int i = NumElts/2; i != NumElts; ++i) - if (!isUndefOrEqual(N->getMaskElt(i), NumElts/2)) + if (!isUndefOrEqual(Mask[i], NumElts/2)) return false; return true; } @@ -4164,14 +3999,13 @@ unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction. -unsigned X86::getShufflePALIGNRImmediate(SDNode *N) { - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); - EVT VVT = N->getValueType(0); - unsigned EltSize = VVT.getVectorElementType().getSizeInBits() >> 3; +static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) { + EVT VT = SVOp->getValueType(0); + unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3; int Val = 0; unsigned i, e; - for (i = 0, e = VVT.getVectorNumElements(); i != e; ++i) { + for (i = 0, e = VT.getVectorNumElements(); i != e; ++i) { Val = SVOp->getMaskElt(i); if (Val >= 0) break; @@ -4631,29 +4465,14 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, case X86ISD::SHUFPS: case X86ISD::SHUFPD: ImmN = N->getOperand(N->getNumOperands()-1); - DecodeSHUFPSMask(NumElems, - cast<ConstantSDNode>(ImmN)->getZExtValue(), - ShuffleMask); - break; - case X86ISD::PUNPCKHBW: - case X86ISD::PUNPCKHWD: - case X86ISD::PUNPCKHDQ: - case X86ISD::PUNPCKHQDQ: - DecodePUNPCKHMask(NumElems, ShuffleMask); - break; - case X86ISD::UNPCKHPS: - case X86ISD::UNPCKHPD: - DecodeUNPCKHPMask(VT, ShuffleMask); + DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); break; - case X86ISD::PUNPCKLBW: - case X86ISD::PUNPCKLWD: - case X86ISD::PUNPCKLDQ: - case X86ISD::PUNPCKLQDQ: - DecodePUNPCKLMask(VT, ShuffleMask); + case X86ISD::UNPCKH: + DecodeUNPCKHMask(VT, ShuffleMask); break; - case X86ISD::UNPCKLPS: - case X86ISD::UNPCKLPD: - DecodeUNPCKLPMask(VT, ShuffleMask); + case X86ISD::UNPCKL: + DecodeUNPCKLMask(VT, ShuffleMask); break; case X86ISD::MOVHLPS: DecodeMOVHLPSMask(NumElems, ShuffleMask); @@ -4686,27 +4505,12 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG, Depth+1); } - case X86ISD::VPERMILPS: - ImmN = N->getOperand(N->getNumOperands()-1); - DecodeVPERMILPSMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(), - ShuffleMask); - break; - case X86ISD::VPERMILPSY: + case X86ISD::VPERMILP: ImmN = N->getOperand(N->getNumOperands()-1); - DecodeVPERMILPSMask(8, cast<ConstantSDNode>(ImmN)->getZExtValue(), + DecodeVPERMILPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), ShuffleMask); break; - case X86ISD::VPERMILPD: - ImmN = N->getOperand(N->getNumOperands()-1); - DecodeVPERMILPDMask(2, cast<ConstantSDNode>(ImmN)->getZExtValue(), - ShuffleMask); - break; - case X86ISD::VPERMILPDY: - ImmN = N->getOperand(N->getNumOperands()-1); - DecodeVPERMILPDMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(), - ShuffleMask); - break; - case X86ISD::VPERM2F128: + case X86ISD::VPERM2X128: ImmN = N->getOperand(N->getNumOperands()-1); DecodeVPERM2F128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), ShuffleMask); @@ -5334,8 +5138,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DAG); } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); - assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!"); - EVT MiddleVT = MVT::v4i32; + unsigned NumBits = VT.getSizeInBits(); + assert((NumBits == 128 || NumBits == 256) && + "Expected an SSE or AVX value type!"); + EVT MiddleVT = NumBits == 128 ? MVT::v4i32 : MVT::v8i32; Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasXMMInt(), DAG); @@ -6256,7 +6062,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { // from X. if (NumHi == 3) { // Normalize it so the 3 elements come from V1. - CommuteVectorShuffleMask(PermMask, VT); + CommuteVectorShuffleMask(PermMask, 4); std::swap(V1, V2); } @@ -6566,70 +6372,6 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { X86::getShuffleSHUFImmediate(SVOp), DAG); } -static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) { - switch(VT.getSimpleVT().SimpleTy) { - case MVT::v4i32: return X86ISD::PUNPCKLDQ; - case MVT::v2i64: return X86ISD::PUNPCKLQDQ; - case MVT::v8i32: - if (HasAVX2) return X86ISD::PUNPCKLDQ; - // else use fp unit for int unpack. - case MVT::v8f32: - case MVT::v4f32: return X86ISD::UNPCKLPS; - case MVT::v4i64: - if (HasAVX2) return X86ISD::PUNPCKLQDQ; - // else use fp unit for int unpack. - case MVT::v4f64: - case MVT::v2f64: return X86ISD::UNPCKLPD; - case MVT::v32i8: - case MVT::v16i8: return X86ISD::PUNPCKLBW; - case MVT::v16i16: - case MVT::v8i16: return X86ISD::PUNPCKLWD; - default: - llvm_unreachable("Unknown type for unpckl"); - } - return 0; -} - -static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) { - switch(VT.getSimpleVT().SimpleTy) { - case MVT::v4i32: return X86ISD::PUNPCKHDQ; - case MVT::v2i64: return X86ISD::PUNPCKHQDQ; - case MVT::v8i32: - if (HasAVX2) return X86ISD::PUNPCKHDQ; - // else use fp unit for int unpack. - case MVT::v8f32: - case MVT::v4f32: return X86ISD::UNPCKHPS; - case MVT::v4i64: - if (HasAVX2) return X86ISD::PUNPCKHQDQ; - // else use fp unit for int unpack. - case MVT::v4f64: - case MVT::v2f64: return X86ISD::UNPCKHPD; - case MVT::v32i8: - case MVT::v16i8: return X86ISD::PUNPCKHBW; - case MVT::v16i16: - case MVT::v8i16: return X86ISD::PUNPCKHWD; - default: - llvm_unreachable("Unknown type for unpckh"); - } - return 0; -} - -static inline unsigned getVPERMILOpcode(EVT VT) { - switch(VT.getSimpleVT().SimpleTy) { - case MVT::v4i32: - case MVT::v4f32: return X86ISD::VPERMILPS; - case MVT::v2i64: - case MVT::v2f64: return X86ISD::VPERMILPD; - case MVT::v8i32: - case MVT::v8f32: return X86ISD::VPERMILPSY; - case MVT::v4i64: - case MVT::v4f64: return X86ISD::VPERMILPDY; - default: - llvm_unreachable("Unknown type for vpermil"); - } - return 0; -} - static SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI, @@ -6703,17 +6445,19 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); unsigned NumElems = VT.getVectorNumElements(); - bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; bool V1IsSplat = false; bool V2IsSplat = false; bool HasXMMInt = Subtarget->hasXMMInt(); + bool HasAVX = Subtarget->hasAVX(); bool HasAVX2 = Subtarget->hasAVX2(); MachineFunction &MF = DAG.getMachineFunction(); bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles"); + assert(V1.getOpcode() != ISD::UNDEF && "Op 1 of shuffle should not be undef"); + // Vector shuffle lowering takes 3 steps: // // 1) Normalize the input vectors. Here splats, zeroed vectors, profitable @@ -6738,11 +6482,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and // unpckh_undef). Only use pshufd if speed is more important than size. if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1, - DAG); + return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG); if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, - DAG); + return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG); if (X86::isMOVDDUPMask(SVOp) && Subtarget->hasSSE3orAVX() && V2IsUndef && RelaxedMayFoldVectorLoad(V1)) @@ -6754,8 +6496,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // Use to match splats if (HasXMMInt && X86::isUNPCKHMask(SVOp, HasAVX2) && V2IsUndef && (VT == MVT::v2f64 || VT == MVT::v2i64)) - return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, - DAG); + return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG); if (X86::isPSHUFDMask(SVOp)) { // The actual implementation will match the mask in the if above and then @@ -6787,8 +6528,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } if (X86::isMOVLMask(SVOp)) { - if (V1IsUndef) - return V2; if (ISD::isBuildVectorAllZeros(V1.getNode())) return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl); if (!X86::isMOVLPMask(SVOp)) { @@ -6834,17 +6573,19 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { V2IsSplat = isSplatVector(V2.getNode()); // Canonicalize the splat or undef, if present, to be on the RHS. - if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { + if (V1IsSplat && !V2IsSplat) { Op = CommuteVectorShuffle(SVOp, DAG); SVOp = cast<ShuffleVectorSDNode>(Op); V1 = SVOp->getOperand(0); V2 = SVOp->getOperand(1); std::swap(V1IsSplat, V2IsSplat); - std::swap(V1IsUndef, V2IsUndef); Commuted = true; } - if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) { + SmallVector<int, 32> M; + SVOp->getMask(M); + + if (isCommutedMOVLMask(M, VT, V2IsSplat, V2IsUndef)) { // Shuffling low element of v1 into undef, just return v1. if (V2IsUndef) return V1; @@ -6854,13 +6595,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getMOVL(DAG, dl, VT, V2, V1); } - if (X86::isUNPCKLMask(SVOp, HasAVX2)) - return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V2, - DAG); + if (isUNPCKLMask(M, VT, HasAVX2)) + return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG); - if (X86::isUNPCKHMask(SVOp, HasAVX2)) - return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V2, - DAG); + if (isUNPCKHMask(M, VT, HasAVX2)) + return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG); if (V2IsSplat) { // Normalize mask so all entries that point to V2 points to its first @@ -6884,35 +6623,30 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); if (X86::isUNPCKLMask(NewSVOp, HasAVX2)) - return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V2, V1, - DAG); + return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V2, V1, DAG); if (X86::isUNPCKHMask(NewSVOp, HasAVX2)) - return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V2, V1, - DAG); + return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V2, V1, DAG); } // Normalize the node to match x86 shuffle ops if needed - if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp)) + if (!V2IsUndef && (isSHUFPMask(M, VT, /* Commuted */ true) || + isVSHUFPYMask(M, VT, HasAVX, /* Commuted */ true))) return CommuteVectorShuffle(SVOp, DAG); // The checks below are all present in isShuffleMaskLegal, but they are // inlined here right now to enable us to directly emit target specific // nodes, and remove one by one until they don't return Op anymore. - SmallVector<int, 16> M; - SVOp->getMask(M); if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX())) return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2, - X86::getShufflePALIGNRImmediate(SVOp), + getShufflePALIGNRImmediate(SVOp), DAG); if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) && SVOp->getSplatIndex() == 0 && V2IsUndef) { - if (VT == MVT::v2f64) - return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG); - if (VT == MVT::v2i64) - return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG); + if (VT == MVT::v2f64 || VT == MVT::v2i64) + return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG); } if (isPSHUFHWMask(M, VT)) @@ -6929,12 +6663,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, X86::getShuffleSHUFImmediate(SVOp), DAG); - if (X86::isUNPCKL_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1, - DAG); - if (X86::isUNPCKH_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, - DAG); + if (isUNPCKL_v_undef_Mask(M, VT)) + return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG); + if (isUNPCKH_v_undef_Mask(M, VT)) + return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG); //===--------------------------------------------------------------------===// // Generate target specific nodes for 128 or 256-bit shuffles only @@ -6942,44 +6674,23 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // // Handle VMOVDDUPY permutations - if (isMOVDDUPYMask(SVOp, Subtarget)) + if (V2IsUndef && isMOVDDUPYMask(M, VT, HasAVX)) return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG); - // Handle VPERMILPS* permutations - if (isVPERMILPSMask(M, VT, Subtarget)) - return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1, - getShuffleVPERMILPSImmediate(SVOp), DAG); - - // Handle VPERMILPD* permutations - if (isVPERMILPDMask(M, VT, Subtarget)) - return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1, - getShuffleVPERMILPDImmediate(SVOp), DAG); + // Handle VPERMILPS/D* permutations + if (isVPERMILPMask(M, VT, HasAVX)) + return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, + getShuffleVPERMILPImmediate(SVOp), DAG); - // Handle VPERM2F128 permutations - if (isVPERM2F128Mask(M, VT, Subtarget)) - return getTargetShuffleNode(X86ISD::VPERM2F128, dl, VT, V1, V2, - getShuffleVPERM2F128Immediate(SVOp), DAG); + // Handle VPERM2F128/VPERM2I128 permutations + if (isVPERM2X128Mask(M, VT, HasAVX)) + return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1, + V2, getShuffleVPERM2X128Immediate(SVOp), DAG); - // Handle VSHUFPSY permutations - if (isVSHUFPSYMask(M, VT, Subtarget)) + // Handle VSHUFPS/DY permutations + if (isVSHUFPYMask(M, VT, HasAVX)) return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, - getShuffleVSHUFPSYImmediate(SVOp), DAG); - - // Handle VSHUFPDY permutations - if (isVSHUFPDYMask(M, VT, Subtarget)) - return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, - getShuffleVSHUFPDYImmediate(SVOp), DAG); - - // Try to swap operands in the node to match x86 shuffle ops - if (isCommutedVSHUFPMask(M, VT, Subtarget)) { - // Now we need to commute operands. - SVOp = cast<ShuffleVectorSDNode>(CommuteVectorShuffle(SVOp, DAG)); - V1 = SVOp->getOperand(0); - V2 = SVOp->getOperand(1); - unsigned Immediate = (NumElems == 4) ? getShuffleVSHUFPDYImmediate(SVOp): - getShuffleVSHUFPSYImmediate(SVOp); - return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, Immediate, DAG); - } + getShuffleVSHUFPYImmediate(SVOp), DAG); //===--------------------------------------------------------------------===// // Since no target specific shuffle was selected for this generic one, @@ -7888,7 +7599,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, LLVMContext *Context = DAG.getContext(); // Build some magic constants. - std::vector<Constant*> CV0; + SmallVector<Constant*,4> CV0; CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000))); CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000))); CV0.push_back(ConstantInt::get(*Context, APInt(32, 0))); @@ -7896,7 +7607,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, Constant *C0 = ConstantVector::get(CV0); SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16); - std::vector<Constant*> CV1; + SmallVector<Constant*,2> CV1; CV1.push_back( ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL)))); CV1.push_back( @@ -8176,17 +7887,13 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, EVT EltVT = VT; if (VT.isVector()) EltVT = VT.getVectorElementType(); - std::vector<Constant*> CV; + SmallVector<Constant*,4> CV; if (EltVT == MVT::f64) { Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))); - CV.push_back(C); - CV.push_back(C); + CV.assign(2, C); } else { Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))); - CV.push_back(C); - CV.push_back(C); - CV.push_back(C); - CV.push_back(C); + CV.assign(4, C); } Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); @@ -8201,19 +7908,18 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); EVT EltVT = VT; - if (VT.isVector()) + unsigned NumElts = VT == MVT::f64 ? 2 : 4; + if (VT.isVector()) { EltVT = VT.getVectorElementType(); - std::vector<Constant*> CV; + NumElts = VT.getVectorNumElements(); + } + SmallVector<Constant*,8> CV; if (EltVT == MVT::f64) { Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))); - CV.push_back(C); - CV.push_back(C); + CV.assign(NumElts, C); } else { Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))); - CV.push_back(C); - CV.push_back(C); - CV.push_back(C); - CV.push_back(C); + CV.assign(NumElts, C); } Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); @@ -8221,11 +7927,12 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo::getConstantPool(), false, false, false, 16); if (VT.isVector()) { + MVT XORVT = VT.getSizeInBits() == 128 ? MVT::v2i64 : MVT::v4i64; return DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getNode(ISD::XOR, dl, MVT::v2i64, - DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, + DAG.getNode(ISD::XOR, dl, XORVT, + DAG.getNode(ISD::BITCAST, dl, XORVT, Op.getOperand(0)), - DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Mask))); + DAG.getNode(ISD::BITCAST, dl, XORVT, Mask))); } else { return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask); } @@ -8254,7 +7961,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // type, and that won't be f80 since that is not custom lowered. // First get the sign bit of second operand. - std::vector<Constant*> CV; + SmallVector<Constant*,4> CV; if (SrcVT == MVT::f64) { CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)))); CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0)))); @@ -9253,7 +8960,7 @@ SDValue X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows() || - EnableSegmentedStacks) && + getTargetMachine().Options.EnableSegmentedStacks) && "This should be used only on Windows targets or when segmented stacks " "are being used"); assert(!Subtarget->isTargetEnvMacho() && "Not implemented"); @@ -9267,7 +8974,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, bool Is64Bit = Subtarget->is64Bit(); EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32; - if (EnableSegmentedStacks) { + if (getTargetMachine().Options.EnableSegmentedStacks) { MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -9403,7 +9110,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { if (ArgMode == 2) { // Sanity Check: Make sure using fp_offset makes sense. - assert(!UseSoftFloat && + assert(!getTargetMachine().Options.UseSoftFloat && !(DAG.getMachineFunction() .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) && Subtarget->hasXMM()); @@ -10472,7 +10179,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, DAG.getConstant(4, MVT::i32)); - R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M); + R = DAG.getNode(ISD::VSELECT, dl, VT, Op, M, R); // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); @@ -10487,13 +10194,13 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, DAG.getConstant(2, MVT::i32)); - R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M); + R = DAG.getNode(ISD::VSELECT, dl, VT, Op, M, R); // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); // return pblendv(r, r+r, a); R = DAG.getNode(ISD::VSELECT, dl, VT, Op, - R, DAG.getNode(ISD::ADD, dl, VT, R, R)); + DAG.getNode(ISD::ADD, dl, VT, R, R), R); return R; } @@ -11194,6 +10901,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::PSIGN: return "X86ISD::PSIGN"; case X86ISD::BLENDV: return "X86ISD::BLENDV"; + case X86ISD::HADD: return "X86ISD::HADD"; + case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; case X86ISD::FHSUB: return "X86ISD::FHSUB"; case X86ISD::FMAX: return "X86ISD::FMAX"; @@ -11266,24 +10975,11 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MOVSLDUP_LD: return "X86ISD::MOVSLDUP_LD"; case X86ISD::MOVSD: return "X86ISD::MOVSD"; case X86ISD::MOVSS: return "X86ISD::MOVSS"; - case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS"; - case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD"; - case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS"; - case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD"; - case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW"; - case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD"; - case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ"; - case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ"; - case X86ISD::PUNPCKHBW: return "X86ISD::PUNPCKHBW"; - case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD"; - case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ"; - case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ"; + case X86ISD::UNPCKL: return "X86ISD::UNPCKL"; + case X86ISD::UNPCKH: return "X86ISD::UNPCKH"; case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; - case X86ISD::VPERMILPS: return "X86ISD::VPERMILPS"; - case X86ISD::VPERMILPSY: return "X86ISD::VPERMILPSY"; - case X86ISD::VPERMILPD: return "X86ISD::VPERMILPD"; - case X86ISD::VPERMILPDY: return "X86ISD::VPERMILPDY"; - case X86ISD::VPERM2F128: return "X86ISD::VPERM2F128"; + case X86ISD::VPERMILP: return "X86ISD::VPERMILP"; + case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA"; @@ -11391,7 +11087,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const { // Very little shuffling can be done for 64-bit vectors right now. if (VT.getSizeInBits() == 64) - return isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX()); + return false; // FIXME: pshufb, blends, shifts. return (VT.getVectorNumElements() == 2 || @@ -11419,7 +11115,7 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask, return (isMOVLMask(Mask, VT) || isCommutedMOVLMask(Mask, VT, true) || isSHUFPMask(Mask, VT) || - isCommutedSHUFPMask(Mask, VT)); + isSHUFPMask(Mask, VT, /* Commuted */ true)); } return false; } @@ -12289,7 +11985,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB, MachineFunction *MF = BB->getParent(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); - assert(EnableSegmentedStacks); + assert(getTargetMachine().Options.EnableSegmentedStacks); unsigned TlsReg = Is64Bit ? X86::FS : X86::GS; unsigned TlsOffset = Is64Bit ? 0x70 : 0x30; @@ -13169,7 +12865,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // the operands would cause it to handle comparisons between positive // and negative zero incorrectly. if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) { - if (!UnsafeFPMath && + if (!DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; std::swap(LHS, RHS); @@ -13179,7 +12875,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, case ISD::SETOLE: // Converting this to a min would handle comparisons between positive // and negative zero incorrectly. - if (!UnsafeFPMath && + if (!DAG.getTarget().Options.UnsafeFPMath && !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) break; Opcode = X86ISD::FMIN; @@ -13197,7 +12893,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, case ISD::SETOGE: // Converting this to a max would handle comparisons between positive // and negative zero incorrectly. - if (!UnsafeFPMath && + if (!DAG.getTarget().Options.UnsafeFPMath && !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) break; Opcode = X86ISD::FMAX; @@ -13207,7 +12903,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // the operands would cause it to handle comparisons between positive // and negative zero incorrectly. if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) { - if (!UnsafeFPMath && + if (!DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; std::swap(LHS, RHS); @@ -13233,7 +12929,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Converting this to a min would handle comparisons between positive // and negative zero incorrectly, and swapping the operands would // cause it to handle NaNs incorrectly. - if (!UnsafeFPMath && + if (!DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) { if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) break; @@ -13243,7 +12939,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, break; case ISD::SETUGT: // Converting this to a min would handle NaNs incorrectly. - if (!UnsafeFPMath && + if (!DAG.getTarget().Options.UnsafeFPMath && (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) break; Opcode = X86ISD::FMIN; @@ -13268,7 +12964,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Converting this to a max would handle comparisons between positive // and negative zero incorrectly, and swapping the operands would // cause it to handle NaNs incorrectly. - if (!UnsafeFPMath && + if (!DAG.getTarget().Options.UnsafeFPMath && !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) { if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) break; @@ -14048,7 +13744,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, X = DAG.getNode(ISD::BITCAST, DL, BlendVT, X); Y = DAG.getNode(ISD::BITCAST, DL, BlendVT, Y); Mask = DAG.getNode(ISD::BITCAST, DL, BlendVT, Mask); - Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, X, Y); + Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X); return DAG.getNode(ISD::BITCAST, DL, VT, Mask); } } @@ -14232,7 +13928,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue StoredVal = St->getOperand(1); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - // If we are saving a concatination of two XMM registers, perform two stores. + // If we are saving a concatenation of two XMM registers, perform two stores. // This is better in Sandy Bridge cause one 256-bit mem op is done via two // 128-bit ones. If in the future the cost becomes only one memory access the // first version would be better. @@ -14342,7 +14038,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, const Function *F = DAG.getMachineFunction().getFunction(); bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); - bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps + bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps && Subtarget->hasXMMInt(); if ((VT.isVector() || (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) && @@ -14458,7 +14154,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, /// set to A, RHS to B, and the routine returns 'true'. /// Note that the binary operation should have the property that if one of the /// operands is UNDEF then the result is UNDEF. -static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) { +static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) { // Look for the following pattern: if // A = < float a0, float a1, float a2, float a3 > // B = < float b0, float b1, float b2, float b3 > @@ -14474,7 +14170,18 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) { return false; EVT VT = LHS.getValueType(); - unsigned N = VT.getVectorNumElements(); + + assert((VT.is128BitVector() || VT.is256BitVector()) && + "Unsupported vector type for horizontal add/sub"); + + // Handle 128 and 256-bit vector lengths. AVX defines horizontal add/sub to + // operate independently on 128-bit lanes. + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumLanes = VT.getSizeInBits()/128; + unsigned NumLaneElts = NumElts / NumLanes; + assert((NumLaneElts % 2 == 0) && + "Vector type should have an even number of elements in each lane"); + unsigned HalfLaneElts = NumLaneElts/2; // View LHS in the form // LHS = VECTOR_SHUFFLE A, B, LMask @@ -14483,7 +14190,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) { // NOTE: in what follows a default initialized SDValue represents an UNDEF of // type VT. SDValue A, B; - SmallVector<int, 8> LMask(N); + SmallVector<int, 16> LMask(NumElts); if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) { if (LHS.getOperand(0).getOpcode() != ISD::UNDEF) A = LHS.getOperand(0); @@ -14493,14 +14200,14 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) { } else { if (LHS.getOpcode() != ISD::UNDEF) A = LHS; - for (unsigned i = 0; i != N; ++i) + for (unsigned i = 0; i != NumElts; ++i) LMask[i] = i; } // Likewise, view RHS in the form // RHS = VECTOR_SHUFFLE C, D, RMask SDValue C, D; - SmallVector<int, 8> RMask(N); + SmallVector<int, 16> RMask(NumElts); if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) { if (RHS.getOperand(0).getOpcode() != ISD::UNDEF) C = RHS.getOperand(0); @@ -14510,7 +14217,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) { } else { if (RHS.getOpcode() != ISD::UNDEF) C = RHS; - for (unsigned i = 0; i != N; ++i) + for (unsigned i = 0; i != NumElts; ++i) RMask[i] = i; } @@ -14525,30 +14232,28 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) { // If A and B occur in reverse order in RHS, then "swap" them (which means // rewriting the mask). if (A != C) - for (unsigned i = 0; i != N; ++i) { - unsigned Idx = RMask[i]; - if (Idx < N) - RMask[i] += N; - else if (Idx < 2*N) - RMask[i] -= N; - } + CommuteVectorShuffleMask(RMask, NumElts); // At this point LHS and RHS are equivalent to // LHS = VECTOR_SHUFFLE A, B, LMask // RHS = VECTOR_SHUFFLE A, B, RMask // Check that the masks correspond to performing a horizontal operation. - for (unsigned i = 0; i != N; ++i) { - unsigned LIdx = LMask[i], RIdx = RMask[i]; + for (unsigned i = 0; i != NumElts; ++i) { + int LIdx = LMask[i], RIdx = RMask[i]; // Ignore any UNDEF components. - if (LIdx >= 2*N || RIdx >= 2*N || (!A.getNode() && (LIdx < N || RIdx < N)) - || (!B.getNode() && (LIdx >= N || RIdx >= N))) + if (LIdx < 0 || RIdx < 0 || + (!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) || + (!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts))) continue; // Check that successive elements are being operated on. If not, this is // not a horizontal operation. - if (!(LIdx == 2*i && RIdx == 2*i + 1) && - !(isCommutative && LIdx == 2*i + 1 && RIdx == 2*i)) + unsigned Src = (i/HalfLaneElts) % 2; // each lane is split between srcs + unsigned LaneStart = (i/NumLaneElts) * NumLaneElts; + int Index = 2*(i%HalfLaneElts) + NumElts*Src + LaneStart; + if (!(LIdx == Index && RIdx == Index + 1) && + !(IsCommutative && LIdx == Index + 1 && RIdx == Index)) return false; } @@ -14565,7 +14270,8 @@ static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, SDValue RHS = N->getOperand(1); // Try to synthesize horizontal adds from adds of shuffles. - if (Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64) && + if (((Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || + (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) && isHorizontalBinOp(LHS, RHS, true)) return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS); return SDValue(); @@ -14579,7 +14285,8 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG, SDValue RHS = N->getOperand(1); // Try to synthesize horizontal subs from subs of shuffles. - if (Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64) && + if (((Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || + (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) && isHorizontalBinOp(LHS, RHS, false)) return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS); return SDValue(); @@ -14783,7 +14490,8 @@ static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG, SDValue Op1 = N->getOperand(1); // Try to synthesize horizontal adds from adds of shuffles. - if ((Subtarget->hasSSSE3orAVX()) && (VT == MVT::v8i16 || VT == MVT::v4i32) && + if (((Subtarget->hasSSSE3orAVX() && (VT == MVT::v8i16 || VT == MVT::v4i32)) || + (Subtarget->hasAVX2() && (VT == MVT::v16i16 || MVT::v8i32))) && isHorizontalBinOp(Op0, Op1, true)) return DAG.getNode(X86ISD::HADD, N->getDebugLoc(), VT, Op0, Op1); @@ -14815,8 +14523,9 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG, // Try to synthesize horizontal adds from adds of shuffles. EVT VT = N->getValueType(0); - if ((Subtarget->hasSSSE3orAVX()) && (VT == MVT::v8i16 || VT == MVT::v4i32) && - isHorizontalBinOp(Op0, Op1, false)) + if (((Subtarget->hasSSSE3orAVX() && (VT == MVT::v8i16 || VT == MVT::v4i32)) || + (Subtarget->hasAVX2() && (VT == MVT::v16i16 || VT == MVT::v8i32))) && + isHorizontalBinOp(Op0, Op1, true)) return DAG.getNode(X86ISD::HSUB, N->getDebugLoc(), VT, Op0, Op1); return OptimizeConditionalInDecrement(N, DAG); @@ -14857,18 +14566,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::SHUFPS: // Handle all target specific shuffles case X86ISD::SHUFPD: case X86ISD::PALIGN: - case X86ISD::PUNPCKHBW: - case X86ISD::PUNPCKHWD: - case X86ISD::PUNPCKHDQ: - case X86ISD::PUNPCKHQDQ: - case X86ISD::UNPCKHPS: - case X86ISD::UNPCKHPD: - case X86ISD::PUNPCKLBW: - case X86ISD::PUNPCKLWD: - case X86ISD::PUNPCKLDQ: - case X86ISD::PUNPCKLQDQ: - case X86ISD::UNPCKLPS: - case X86ISD::UNPCKLPD: + case X86ISD::UNPCKH: + case X86ISD::UNPCKL: case X86ISD::MOVHLPS: case X86ISD::MOVLHPS: case X86ISD::PSHUFD: @@ -14876,11 +14575,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PSHUFLW: case X86ISD::MOVSS: case X86ISD::MOVSD: - case X86ISD::VPERMILPS: - case X86ISD::VPERMILPSY: - case X86ISD::VPERMILPD: - case X86ISD::VPERMILPDY: - case X86ISD::VPERM2F128: + case X86ISD::VPERMILP: + case X86ISD::VPERM2X128: case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index ccff3a5..cfc1f88 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -273,23 +273,10 @@ namespace llvm { MOVLPD, MOVSD, MOVSS, - UNPCKLPS, - UNPCKLPD, - UNPCKHPS, - UNPCKHPD, - PUNPCKLBW, - PUNPCKLWD, - PUNPCKLDQ, - PUNPCKLQDQ, - PUNPCKHBW, - PUNPCKHWD, - PUNPCKHDQ, - PUNPCKHQDQ, - VPERMILPS, - VPERMILPSY, - VPERMILPD, - VPERMILPDY, - VPERM2F128, + UNPCKL, + UNPCKH, + VPERMILP, + VPERM2X128, VBROADCAST, // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, @@ -468,10 +455,6 @@ namespace llvm { /// the specified VECTOR_SHUFFLE mask with PSHUFLW instruction. unsigned getShufflePSHUFLWImmediate(SDNode *N); - /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle - /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction. - unsigned getShufflePALIGNRImmediate(SDNode *N); - /// getExtractVEXTRACTF128Immediate - Return the appropriate /// immediate to extract the specified EXTRACT_SUBVECTOR index /// with VEXTRACTF128 instructions. diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index d868773..f443088 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -58,3 +58,391 @@ let isAsmParserOnly = 1 in { defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">; defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W; } + +//===----------------------------------------------------------------------===// +// FMA4 - AMD 4 operand Fused Multiply-Add instructions +//===----------------------------------------------------------------------===// + + +multiclass fma4s<bits<8> opc, string OpcodeStr> { + def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, XOP_W; + def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, f128mem:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, XOP_W; + def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>; + +} + +multiclass fma4p<bits<8> opc, string OpcodeStr> { + def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, XOP_W; + def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, f128mem:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, XOP_W; + def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>; + def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, VR256:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, XOP_W; + def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, f256mem:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, XOP_W; + def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, f256mem:$src2, VR256:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>; +} + +let isAsmParserOnly = 1 in { + defm VFMADDSS4 : fma4s<0x6A, "vfmaddss">; + defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd">; + defm VFMADDPS4 : fma4p<0x68, "vfmaddps">; + defm VFMADDPD4 : fma4p<0x69, "vfmaddpd">; + defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss">; + defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd">; + defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps">; + defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd">; + defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss">; + defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd">; + defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps">; + defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd">; + defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss">; + defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd">; + defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps">; + defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd">; + defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps">; + defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd">; + defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps">; + defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd">; +} + +// FMA4 Intrinsics patterns + +// VFMADD +def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, VR128:$src2, VR128:$src3), + (VFMADDSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, VR128:$src2, + (alignedloadv4f32 addr:$src3)), + (VFMADDSS4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, (alignedloadv4f32 addr:$src2), + VR128:$src3), + (VFMADDSS4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2, VR128:$src3), + (VFMADDSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2, + (alignedloadv2f64 addr:$src3)), + (VFMADDSD4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, (alignedloadv2f64 addr:$src2), + VR128:$src3), + (VFMADDSD4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, VR128:$src2, VR128:$src3), + (VFMADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, VR128:$src2, + (alignedloadv4f32 addr:$src3)), + (VFMADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2), + VR128:$src3), + (VFMADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, VR128:$src2, VR128:$src3), + (VFMADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, VR128:$src2, + (alignedloadv2f64 addr:$src3)), + (VFMADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2), + VR128:$src3), + (VFMADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFMADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1, VR256:$src2, + (alignedloadv8f32 addr:$src3)), + (VFMADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1, + (alignedloadv8f32 addr:$src2), + VR256:$src3), + (VFMADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFMADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1, VR256:$src2, + (alignedloadv4f64 addr:$src3)), + (VFMADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1, + (alignedloadv4f64 addr:$src2), + VR256:$src3), + (VFMADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +// VFMSUB +def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, VR128:$src2, VR128:$src3), + (VFMSUBSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, VR128:$src2, + (alignedloadv4f32 addr:$src3)), + (VFMSUBSS4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, (alignedloadv4f32 addr:$src2), + VR128:$src3), + (VFMSUBSS4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, VR128:$src2, VR128:$src3), + (VFMSUBSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, VR128:$src2, + (alignedloadv2f64 addr:$src3)), + (VFMSUBSD4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, (alignedloadv2f64 addr:$src2), + VR128:$src3), + (VFMSUBSD4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, VR128:$src2, VR128:$src3), + (VFMSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, VR128:$src2, + (alignedloadv4f32 addr:$src3)), + (VFMSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2), + VR128:$src3), + (VFMSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, VR128:$src2, VR128:$src3), + (VFMSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, VR128:$src2, + (alignedloadv2f64 addr:$src3)), + (VFMSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2), + VR128:$src3), + (VFMSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFMSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1, VR256:$src2, + (alignedloadv8f32 addr:$src3)), + (VFMSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1, + (alignedloadv8f32 addr:$src2), + VR256:$src3), + (VFMSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFMSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1, VR256:$src2, + (alignedloadv4f64 addr:$src3)), + (VFMSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1, + (alignedloadv4f64 addr:$src2), + VR256:$src3), + (VFMSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +// VFNMADD +def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, VR128:$src2, VR128:$src3), + (VFNMADDSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, VR128:$src2, + (alignedloadv4f32 addr:$src3)), + (VFNMADDSS4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, (alignedloadv4f32 addr:$src2), + VR128:$src3), + (VFNMADDSS4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, VR128:$src2, VR128:$src3), + (VFNMADDSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, VR128:$src2, + (alignedloadv2f64 addr:$src3)), + (VFNMADDSD4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, (alignedloadv2f64 addr:$src2), + VR128:$src3), + (VFNMADDSD4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, VR128:$src2, VR128:$src3), + (VFNMADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, VR128:$src2, + (alignedloadv4f32 addr:$src3)), + (VFNMADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2), + VR128:$src3), + (VFNMADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, VR128:$src2, VR128:$src3), + (VFNMADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, VR128:$src2, + (alignedloadv2f64 addr:$src3)), + (VFNMADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2), + VR128:$src3), + (VFNMADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFNMADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1, VR256:$src2, + (alignedloadv8f32 addr:$src3)), + (VFNMADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1, + (alignedloadv8f32 addr:$src2), + VR256:$src3), + (VFNMADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFNMADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1, VR256:$src2, + (alignedloadv4f64 addr:$src3)), + (VFNMADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1, + (alignedloadv4f64 addr:$src2), + VR256:$src3), + (VFNMADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +// VFNMSUB +def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, VR128:$src2, VR128:$src3), + (VFNMSUBSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, VR128:$src2, + (alignedloadv4f32 addr:$src3)), + (VFNMSUBSS4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, (alignedloadv4f32 addr:$src2), + VR128:$src3), + (VFNMSUBSS4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, VR128:$src2, VR128:$src3), + (VFNMSUBSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, VR128:$src2, + (alignedloadv2f64 addr:$src3)), + (VFNMSUBSD4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, (alignedloadv2f64 addr:$src2), + VR128:$src3), + (VFNMSUBSD4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, VR128:$src2, VR128:$src3), + (VFNMSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, VR128:$src2, + (alignedloadv4f32 addr:$src3)), + (VFNMSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2), + VR128:$src3), + (VFNMSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, VR128:$src2, VR128:$src3), + (VFNMSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, VR128:$src2, + (alignedloadv2f64 addr:$src3)), + (VFNMSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2), + VR128:$src3), + (VFNMSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFNMSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1, VR256:$src2, + (alignedloadv8f32 addr:$src3)), + (VFNMSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1, + (alignedloadv8f32 addr:$src2), + VR256:$src3), + (VFNMSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFNMSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1, VR256:$src2, + (alignedloadv4f64 addr:$src3)), + (VFNMSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1, + (alignedloadv4f64 addr:$src2), + VR256:$src3), + (VFNMSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +// VFMADDSUB +def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, VR128:$src2, VR128:$src3), + (VFMADDSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, VR128:$src2, + (alignedloadv4f32 addr:$src3)), + (VFMADDSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2), + VR128:$src3), + (VFMADDSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, VR128:$src2, VR128:$src3), + (VFMADDSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, VR128:$src2, + (alignedloadv2f64 addr:$src3)), + (VFMADDSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2), + VR128:$src3), + (VFMADDSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFMADDSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1, VR256:$src2, + (alignedloadv8f32 addr:$src3)), + (VFMADDSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1, + (alignedloadv8f32 addr:$src2), + VR256:$src3), + (VFMADDSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFMADDSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1, VR256:$src2, + (alignedloadv4f64 addr:$src3)), + (VFMADDSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1, + (alignedloadv4f64 addr:$src2), + VR256:$src3), + (VFMADDSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +// VFMSUBADD +def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, VR128:$src2, VR128:$src3), + (VFMSUBADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, VR128:$src2, + (alignedloadv4f32 addr:$src3)), + (VFMSUBADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2), + VR128:$src3), + (VFMSUBADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, VR128:$src2, VR128:$src3), + (VFMSUBADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, VR128:$src2, + (alignedloadv2f64 addr:$src3)), + (VFMSUBADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2), + VR128:$src3), + (VFMSUBADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFMSUBADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1, VR256:$src2, + (alignedloadv8f32 addr:$src3)), + (VFMSUBADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1, + (alignedloadv8f32 addr:$src2), + VR256:$src3), + (VFMSUBADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFMSUBADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1, VR256:$src2, + (alignedloadv4f64 addr:$src3)), + (VFMSUBADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1, + (alignedloadv4f64 addr:$src2), + VR256:$src3), + (VFMSUBADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>; diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index ecd6a93..7ba3639 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -110,6 +110,8 @@ class A7 { bits<5> Prefix = 16; } class T8XD { bits<5> Prefix = 17; } class T8XS { bits<5> Prefix = 18; } class TAXD { bits<5> Prefix = 19; } +class XOP8 { bits<5> Prefix = 20; } +class XOP9 { bits<5> Prefix = 21; } class VEX { bit hasVEXPrefix = 1; } class VEX_W { bit hasVEX_WPrefix = 1; } class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; } @@ -118,7 +120,8 @@ class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; } class VEX_L { bit hasVEX_L = 1; } class VEX_LIG { bit ignoresVEX_L = 1; } class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; } - +class XOP_W { bit hasXOP_WPrefix = 1; } +class XOP { bit hasXOP_Prefix = 1; } class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, string AsmStr, Domain d = GenericDomain> : Instruction { @@ -158,6 +161,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, bit hasVEX_L = 0; // Does this inst use large (256-bit) registers? bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding? + bit hasXOP_WPrefix = 0; // Same bit as VEX_W, but used for swapping operands + bit hasXOP_Prefix = 0; // Does this inst require an XOP prefix? // TSFlags layout should be kept in sync with X86InstrInfo.h. let TSFlags{5-0} = FormBits; @@ -179,6 +184,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{38} = hasVEX_L; let TSFlags{39} = ignoresVEX_L; let TSFlags{40} = has3DNow0F0FOpcode; + let TSFlags{41} = hasXOP_WPrefix; + let TSFlags{42} = hasXOP_Prefix; } class PseudoI<dag oops, dag iops, list<dag> pattern> @@ -332,6 +339,10 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, TB, Requires<[HasAVX]>; +class VoPSI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB, + Requires<[HasXMM]>; // SSE2 Instruction Templates: // @@ -496,6 +507,30 @@ class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm, : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, OpSize, VEX_4V, Requires<[HasFMA3]>; +// FMA4 Instruction Templates +class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag>pattern> + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>; + +// XOP 2, 3 and 4 Operand Instruction Template +class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, + XOP, XOP9, Requires<[HasXOP]>; + +// XOP 2, 3 and 4 Operand Instruction Templates with imm byte +class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, + XOP, XOP8, Requires<[HasXOP]>; + +// XOP 5 operand instruction (VEX encoding!) +class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag>pattern> + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + OpSize, VEX_4V, VEX_I8IMM, Requires<[HasXOP]>; + // X86-64 Instruction templates... // diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 791bbe6..cd13bc4 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -130,28 +130,12 @@ def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>; def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>; def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; -def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>; -def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>; +def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>; +def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>; -def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>; -def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>; +def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>; -def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>; -def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>; -def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>; -def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>; - -def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>; -def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>; -def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>; -def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>; - -def X86VPermilps : SDNode<"X86ISD::VPERMILPS", SDTShuff2OpI>; -def X86VPermilpsy : SDNode<"X86ISD::VPERMILPSY", SDTShuff2OpI>; -def X86VPermilpd : SDNode<"X86ISD::VPERMILPD", SDTShuff2OpI>; -def X86VPermilpdy : SDNode<"X86ISD::VPERMILPDY", SDTShuff2OpI>; - -def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>; +def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; @@ -363,12 +347,6 @@ def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{ return getI8Imm(X86::getShufflePSHUFLWImmediate(N)); }]>; -// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to -// a PALIGNR imm. -def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{ - return getI8Imm(X86::getShufflePALIGNRImmediate(N)); -}]>; - // EXTRACT_get_vextractf128_imm xform function: convert extract_subvector index // to VEXTRACTF128 imm. def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{ diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 24c4a53..7d1b9a1 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1528,9 +1528,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); // Build and insert into an implicit UNDEF value. This is OK because // well be shifting and then extracting the lower 16-bits. - BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2); + BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF),leaInReg2); InsMI2 = - BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY)) + BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(TargetOpcode::COPY)) .addReg(leaInReg2, RegState::Define, X86::sub_16bit) .addReg(Src2, getKillRegState(isKill2)); addRegReg(MIB, leaInReg, true, leaInReg2, true); @@ -2040,13 +2040,12 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { } bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.isTerminator()) return false; + if (!MI->isTerminator()) return false; // Conditional branch is a special case. - if (MCID.isBranch() && !MCID.isBarrier()) + if (MI->isBranch() && !MI->isBarrier()) return true; - if (!MCID.isPredicable()) + if (!MI->isPredicable()) return true; return !isPredicated(MI); } @@ -2072,7 +2071,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // A terminator that isn't a branch can't easily be handled by this // analysis. - if (!I->getDesc().isBranch()) + if (!I->isBranch()) return true; // Handle unconditional branches. @@ -2556,6 +2555,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); switch (MI->getOpcode()) { case X86::V_SET0: + case X86::FsFLD0SS: + case X86::FsFLD0SD: return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr)); case X86::TEST8ri_NOREX: MI->setDesc(get(X86::TEST8ri)); @@ -2771,7 +2772,9 @@ static bool hasPartialRegUpdate(unsigned Opcode) { case X86::RCPSSr: case X86::RCPSSr_Int: case X86::ROUNDSDr: + case X86::ROUNDSDr_Int: case X86::ROUNDSSr: + case X86::ROUNDSSr_Int: case X86::RSQRTSSr: case X86::RSQRTSSr_Int: case X86::SQRTSSr: @@ -2783,7 +2786,9 @@ static bool hasPartialRegUpdate(unsigned Opcode) { case X86::Int_VCVTSS2SDrr: case X86::VRCPSSr: case X86::VROUNDSDr: + case X86::VROUNDSDr_Int: case X86::VROUNDSSr: + case X86::VROUNDSSr_Int: case X86::VRSQRTSSr: case X86::VSQRTSSr: return true; @@ -2911,11 +2916,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Alignment = 16; break; case X86::FsFLD0SD: - case X86::VFsFLD0SD: Alignment = 8; break; case X86::FsFLD0SS: - case X86::VFsFLD0SS: Alignment = 4; break; default: @@ -2950,9 +2953,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, case X86::AVX_SETALLONES: case X86::AVX2_SETALLONES: case X86::FsFLD0SD: - case X86::FsFLD0SS: - case X86::VFsFLD0SD: - case X86::VFsFLD0SS: { + case X86::FsFLD0SS: { // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. // Create a constant-pool entry and operands to load from it. @@ -2978,9 +2979,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineConstantPool &MCP = *MF.getConstantPool(); Type *Ty; unsigned Opc = LoadMI->getOpcode(); - if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS) + if (Opc == X86::FsFLD0SS) Ty = Type::getFloatTy(MF.getFunction()->getContext()); - else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD) + else if (Opc == X86::FsFLD0SD) Ty = Type::getDoubleTy(MF.getFunction()->getContext()); else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY) Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8); @@ -3569,7 +3570,13 @@ static const unsigned ReplaceableInstrsAVX2[][3] = { { X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm }, { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr }, { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm }, - { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr } + { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr }, + { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr }, + { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr }, + { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm }, + { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr }, + { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm }, + { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr } }; // FIXME: Some shuffle and unpack instructions have equivalents in different diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 35631d5..0bc3afa 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -473,6 +473,7 @@ def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasAVX2 : Predicate<"Subtarget->hasAVX2()">; +def HasXMM : Predicate<"Subtarget->hasXMM()">; def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; @@ -480,6 +481,7 @@ def HasAES : Predicate<"Subtarget->hasAES()">; def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">; def HasFMA3 : Predicate<"Subtarget->hasFMA3()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; +def HasXOP : Predicate<"Subtarget->hasXOP()">; def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">; def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">; def HasF16C : Predicate<"Subtarget->hasF16C()">; @@ -1502,6 +1504,9 @@ include "X86InstrFragmentsSIMD.td" // FMA - Fused Multiply-Add support (requires FMA) include "X86InstrFMA.td" +// XOP +include "X86InstrXOP.td" + // SSE, MMX and 3DNow! vector support. include "X86InstrSSE.td" include "X86InstrMMX.td" diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 7cadac1..345f606 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -240,21 +240,13 @@ let Predicates = [HasAVX] in { } // Alias instructions that map fld0 to pxor for sse. -// FIXME: Set encoding to pseudo! -let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, - canFoldAsLoad = 1 in { - def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", - [(set FR32:$dst, fp32imm0)]>, - Requires<[HasSSE1]>, TB, OpSize; - def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", - [(set FR64:$dst, fpimm0)]>, - Requires<[HasSSE2]>, TB, OpSize; - def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", - [(set FR32:$dst, fp32imm0)]>, - Requires<[HasAVX]>, TB, OpSize, VEX_4V; - def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", - [(set FR64:$dst, fpimm0)]>, - Requires<[HasAVX]>, TB, OpSize, VEX_4V; +// This is expanded by ExpandPostRAPseudos. +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isPseudo = 1 in { + def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", + [(set FR32:$dst, fp32imm0)]>, Requires<[HasXMM]>; + def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", + [(set FR64:$dst, fpimm0)]>, Requires<[HasXMMInt]>; } //===----------------------------------------------------------------------===// @@ -569,6 +561,16 @@ let Predicates = [HasAVX] in { (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>; + + // Move low f32 and clear high bits. + def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), + (SUBREG_TO_REG (i32 0), + (VMOVSSrr (v4f32 (V_SET0)), + (EXTRACT_SUBREG (v8f32 VR256:$src), sub_ss)), sub_xmm)>; + def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), + (SUBREG_TO_REG (i32 0), + (VMOVSSrr (v4i32 (V_SET0)), + (EXTRACT_SUBREG (v8i32 VR256:$src), sub_ss)), sub_xmm)>; } let AddedComplexity = 20 in { @@ -596,6 +598,9 @@ let Predicates = [HasAVX] in { // Represent the same patterns above but in the form they appear for // 256-bit types + def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, + (v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))), + (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))), (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; @@ -613,6 +618,15 @@ let Predicates = [HasAVX] in { (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)), sub_xmm)>; + def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, + (v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))), + (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + + // Move low f64 and clear high bits. + def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), + (SUBREG_TO_REG (i32 0), + (VMOVSDrr (v2f64 (V_SET0)), + (EXTRACT_SUBREG (v4f64 VR256:$src), sub_sd)), sub_xmm)>; // Extract and store. def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), @@ -634,6 +648,16 @@ let Predicates = [HasAVX] in { (VMOVSSrr (v4f32 VR128:$src1), (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; + // 256-bit variants + def : Pat<(v8i32 (X86Movsd VR256:$src1, VR256:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_ss), + (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_ss)), sub_xmm)>; + def : Pat<(v8f32 (X86Movsd VR256:$src1, VR256:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_ss), + (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_ss)), sub_xmm)>; + // Shuffle with VMOVSD def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), (VMOVSDrr VR128:$src1, FR64:$src2)>; @@ -650,6 +674,17 @@ let Predicates = [HasAVX] in { (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>; + // 256-bit variants + def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_sd), + (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_sd)), sub_xmm)>; + def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_sd), + (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_sd)), sub_xmm)>; + + // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem // is during lowering, where it's not possible to recognize the fold cause // it has two uses through a bitcast. One use disappears at isel time and the @@ -657,6 +692,9 @@ let Predicates = [HasAVX] in { def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; + def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), + (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2), + sub_sd))>; def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>; @@ -761,6 +799,22 @@ let isCodeGenOnly = 1 in { "movupd\t{$src, $dst|$dst, $src}", []>, VEX; } +let Predicates = [HasAVX] in { +def : Pat<(v8i32 (X86vzmovl + (insert_subvector undef, (v4i32 VR128:$src), (i32 0)))), + (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; +def : Pat<(v4i64 (X86vzmovl + (insert_subvector undef, (v2i64 VR128:$src), (i32 0)))), + (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; +def : Pat<(v8f32 (X86vzmovl + (insert_subvector undef, (v4f32 VR128:$src), (i32 0)))), + (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; +def : Pat<(v4f64 (X86vzmovl + (insert_subvector undef, (v2f64 VR128:$src), (i32 0)))), + (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; +} + + def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>; def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), (VMOVUPSYmr addr:$dst, VR256:$src)>; @@ -1156,14 +1210,17 @@ let Predicates = [HasAVX] in { (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), (VMOVHPSrm VR128:$src1, addr:$src2)>; def : Pat<(X86Movlhps VR128:$src1, + (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), + (VMOVHPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86Movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), (VMOVHPSrm VR128:$src1, addr:$src2)>; - // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem + // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem // is during lowering, where it's not possible to recognize the load fold cause // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. - def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, + def : Pat<(v2f64 (X86Unpckl VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))), (VMOVHPDrm VR128:$src1, addr:$src2)>; @@ -1174,10 +1231,10 @@ let Predicates = [HasAVX] in { // Store patterns def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst), (VMOVHPSmr addr:$dst, VR128:$src)>; def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst), (VMOVHPDmr addr:$dst, VR128:$src)>; } @@ -1189,21 +1246,24 @@ let Predicates = [HasSSE1] in { (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), (MOVHPSrm VR128:$src1, addr:$src2)>; def : Pat<(X86Movlhps VR128:$src1, + (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), + (MOVHPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86Movlhps VR128:$src1, (bc_v4f32 (v2i64 (X86vzload addr:$src2)))), (MOVHPSrm VR128:$src1, addr:$src2)>; // Store patterns def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst), (MOVHPSmr addr:$dst, VR128:$src)>; } let Predicates = [HasSSE2] in { - // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem + // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem // is during lowering, where it's not possible to recognize the load fold cause // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. - def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, + def : Pat<(v2f64 (X86Unpckl VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))), (MOVHPDrm VR128:$src1, addr:$src2)>; @@ -1214,7 +1274,7 @@ let Predicates = [HasSSE2] in { // Store patterns def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst), + (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))),addr:$dst), (MOVHPDmr addr:$dst, VR128:$src)>; } @@ -1943,7 +2003,7 @@ def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), // whenever possible to avoid declaring two versions of each one. def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src), (VCVTDQ2PSYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvtdq2_ps_256 (memopv8i32 addr:$src)), +def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))), (VCVTDQ2PSYrm addr:$src)>; def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src), @@ -2430,27 +2490,27 @@ let AddedComplexity = 10 in { } // AddedComplexity let Predicates = [HasSSE1] in { - def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), + def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))), (UNPCKLPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), + def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)), (UNPCKLPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), + def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))), (UNPCKHPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), + def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)), (UNPCKHPSrr VR128:$src1, VR128:$src2)>; } let Predicates = [HasSSE2] in { - def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), + def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))), (UNPCKLPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), + def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)), (UNPCKLPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), + def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))), (UNPCKHPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), + def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)), (UNPCKHPDrr VR128:$src1, VR128:$src2)>; - // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the + // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the // problem is during lowering, where it's not possible to recognize the load // fold cause it has two uses through a bitcast. One use disappears at isel // time and the fold opportunity reappears. @@ -2463,59 +2523,43 @@ let Predicates = [HasSSE2] in { } let Predicates = [HasAVX] in { - def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), + def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))), (VUNPCKLPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), + def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)), (VUNPCKLPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), + def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))), (VUNPCKHPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), + def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)), (VUNPCKHPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8f32 (X86Unpcklps VR256:$src1, (memopv8f32 addr:$src2))), + def : Pat<(v8f32 (X86Unpckl VR256:$src1, (memopv8f32 addr:$src2))), (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpcklps VR256:$src1, VR256:$src2)), - (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpcklps VR256:$src1, VR256:$src2)), + def : Pat<(v8f32 (X86Unpckl VR256:$src1, VR256:$src2)), (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpcklps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), - (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpckhps VR256:$src1, (memopv8f32 addr:$src2))), - (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpckhps VR256:$src1, VR256:$src2)), - (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpckhps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), + def : Pat<(v8f32 (X86Unpckh VR256:$src1, (memopv8f32 addr:$src2))), (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86Unpckhps VR256:$src1, VR256:$src2)), + def : Pat<(v8f32 (X86Unpckh VR256:$src1, VR256:$src2)), (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), + def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))), (VUNPCKLPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), + def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)), (VUNPCKLPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), + def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))), (VUNPCKHPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), + def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)), (VUNPCKHPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, (memopv4f64 addr:$src2))), + def : Pat<(v4f64 (X86Unpckl VR256:$src1, (memopv4f64 addr:$src2))), (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, VR256:$src2)), + def : Pat<(v4f64 (X86Unpckl VR256:$src1, VR256:$src2)), (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, (memopv4i64 addr:$src2))), - (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, VR256:$src2)), - (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, (memopv4f64 addr:$src2))), - (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, VR256:$src2)), - (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, (memopv4i64 addr:$src2))), + def : Pat<(v4f64 (X86Unpckh VR256:$src1, (memopv4f64 addr:$src2))), (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, VR256:$src2)), + def : Pat<(v4f64 (X86Unpckh VR256:$src1, VR256:$src2)), (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; - // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the + // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the // problem is during lowering, where it's not possible to recognize the load // fold cause it has two uses through a bitcast. One use disappears at isel // time and the fold opportunity reappears. @@ -2869,7 +2913,7 @@ multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), - (ins ssmem:$src1, VR128:$src2), + (ins VR128:$src1, ssmem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; } @@ -3198,13 +3242,13 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), //===----------------------------------------------------------------------===// // Prefetch intrinsic. -def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), +def PREFETCHT0 : VoPSI<0x18, MRM1m, (outs), (ins i8mem:$src), "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>; -def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), +def PREFETCHT1 : VoPSI<0x18, MRM2m, (outs), (ins i8mem:$src), "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>; -def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), +def PREFETCHT2 : VoPSI<0x18, MRM3m, (outs), (ins i8mem:$src), "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>; -def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), +def PREFETCHNTA : VoPSI<0x18, MRM0m, (outs), (ins i8mem:$src), "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>; // Flush cache @@ -3652,6 +3696,8 @@ defm VPOR : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPANDN : PDI_binop_rm<0xDF, "vpandn", X86andnp, v2i64, VR128, memopv2i64, + i128mem, 0, 0>, VEX_4V; let ExeDomain = SSEPackedInt in { let neverHasSideEffects = 1 in { @@ -3666,17 +3712,6 @@ let ExeDomain = SSEPackedInt in { VEX_4V; // PSRADQri doesn't exist in SSE[1-3]. } - def VPANDNrr : PDI<0xDF, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, - (v2i64 (X86andnp VR128:$src1, VR128:$src2)))]>,VEX_4V; - - def VPANDNrm : PDI<0xDF, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (X86andnp VR128:$src1, - (memopv2i64 addr:$src2)))]>, VEX_4V; } } @@ -3714,6 +3749,8 @@ defm VPORY : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64, + i256mem, 0, 0>, VEX_4V; let ExeDomain = SSEPackedInt in { let neverHasSideEffects = 1 in { @@ -3728,17 +3765,6 @@ let ExeDomain = SSEPackedInt in { VEX_4V; // PSRADQYri doesn't exist in SSE[1-3]. } - def VPANDNYrr : PDI<0xDF, MRMSrcReg, - (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), - "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, - (v4i64 (X86andnp VR256:$src1, VR256:$src2)))]>,VEX_4V; - - def VPANDNYrm : PDI<0xDF, MRMSrcMem, - (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), - "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (X86andnp VR256:$src1, - (memopv4i64 addr:$src2)))]>, VEX_4V; } } @@ -3776,6 +3802,8 @@ defm POR : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64, i128mem, 1>; defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64, i128mem, 1>; +defm PANDN : PDI_binop_rm<0xDF, "pandn", X86andnp, v2i64, VR128, memopv2i64, + i128mem, 0>; let ExeDomain = SSEPackedInt in { let neverHasSideEffects = 1 in { @@ -3787,14 +3815,6 @@ let ExeDomain = SSEPackedInt in { (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), "psrldq\t{$src2, $dst|$dst, $src2}", []>; // PSRADQri doesn't exist in SSE[1-3]. - def PANDNrr : PDI<0xDF, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", []>; - - let mayLoad = 1 in - def PANDNrm : PDI<0xDF, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", []>; } } } // Constraints = "$src1 = $dst" @@ -4198,66 +4218,88 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, } let Predicates = [HasAVX] in { - defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpcklbw, + defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, bc_v16i8, 0>, VEX_4V; - defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpcklwd, + defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, bc_v8i16, 0>, VEX_4V; - defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq, + defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, bc_v4i32, 0>, VEX_4V; - defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpcklqdq, + defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, bc_v2i64, 0>, VEX_4V; - defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw, + defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, bc_v16i8, 0>, VEX_4V; - defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckhwd, + defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, bc_v8i16, 0>, VEX_4V; - defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq, + defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, bc_v4i32, 0>, VEX_4V; - defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckhqdq, + defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, bc_v2i64, 0>, VEX_4V; } let Predicates = [HasAVX2] in { - defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbw, + defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl, bc_v32i8>, VEX_4V; - defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwd, + defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl, bc_v16i16>, VEX_4V; - defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldq, + defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl, bc_v8i32>, VEX_4V; - defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpcklqdq, + defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, bc_v4i64>, VEX_4V; - defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw, + defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh, bc_v32i8>, VEX_4V; - defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwd, + defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh, bc_v16i16>, VEX_4V; - defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdq, + defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh, bc_v8i32>, VEX_4V; - defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckhqdq, + defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, bc_v4i64>, VEX_4V; } let Constraints = "$src1 = $dst" in { - defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, + defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, bc_v16i8>; - defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, + defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, bc_v8i16>; - defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, + defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, bc_v4i32>; - defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpcklqdq, + defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, bc_v2i64>; - defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, + defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, bc_v16i8>; - defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, + defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, bc_v8i16>; - defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, + defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, bc_v4i32>; - defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckhqdq, + defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, bc_v2i64>; } } // ExeDomain = SSEPackedInt +// Patterns for using AVX1 instructions with integer vectors +// Here to give AVX2 priority +let Predicates = [HasAVX] in { + def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), + (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), + (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), + (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), + (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; + + def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))), + (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), + (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))), + (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), + (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; +} + // Splat v2f64 / v2i64 let AddedComplexity = 10 in { def : Pat<(splat_lo (v2i64 VR128:$src), (undef)), @@ -4784,7 +4826,7 @@ def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), // AVX 256-bit register conversion intrinsics def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src), (VCVTDQ2PDYrr VR128:$src)>; -def : Pat<(int_x86_avx_cvtdq2_pd_256 (memopv4i32 addr:$src)), +def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))), (VCVTDQ2PDYrm addr:$src)>; def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src), @@ -4794,7 +4836,7 @@ def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)), def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))), (VCVTDQ2PDYrr VR128:$src)>; -def : Pat<(v4f64 (sint_to_fp (memopv4i32 addr:$src))), +def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), (VCVTDQ2PDYrm addr:$src)>; //===---------------------------------------------------------------------===// @@ -5085,7 +5127,7 @@ let Constraints = "$src1 = $dst" in { /// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, - PatFrag mem_frag128, Intrinsic IntId128> { + Intrinsic IntId128> { def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), @@ -5097,12 +5139,12 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (IntId128 - (bitconvert (mem_frag128 addr:$src))))]>, OpSize; + (bitconvert (memopv2i64 addr:$src))))]>, OpSize; } /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr, - PatFrag mem_frag256, Intrinsic IntId256> { + Intrinsic IntId256> { def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), @@ -5114,32 +5156,32 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (IntId256 - (bitconvert (mem_frag256 addr:$src))))]>, OpSize; + (bitconvert (memopv4i64 addr:$src))))]>, OpSize; } let Predicates = [HasAVX] in { - defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8, + defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", int_x86_ssse3_pabs_b_128>, VEX; - defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16, + defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", int_x86_ssse3_pabs_w_128>, VEX; - defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32, + defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", int_x86_ssse3_pabs_d_128>, VEX; } let Predicates = [HasAVX2] in { - defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb", memopv32i8, + defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb", int_x86_avx2_pabs_b>, VEX; - defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw", memopv16i16, + defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw", int_x86_avx2_pabs_w>, VEX; - defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", memopv8i32, + defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", int_x86_avx2_pabs_d>, VEX; } -defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8, +defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", int_x86_ssse3_pabs_b_128>; -defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16, +defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", int_x86_ssse3_pabs_w_128>; -defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32, +defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", int_x86_ssse3_pabs_d_128>; //===---------------------------------------------------------------------===// @@ -5148,8 +5190,7 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32, /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, - PatFrag mem_frag128, Intrinsic IntId128, - bit Is2Addr = 1> { + Intrinsic IntId128, bit Is2Addr = 1> { let isCommutable = 1 in def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -5165,11 +5206,11 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, - (bitconvert (mem_frag128 addr:$src2))))]>, OpSize; + (bitconvert (memopv2i64 addr:$src2))))]>, OpSize; } multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, - PatFrag mem_frag256, Intrinsic IntId256> { + Intrinsic IntId256> { let isCommutable = 1 in def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), @@ -5181,94 +5222,94 @@ multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (IntId256 VR256:$src1, - (bitconvert (mem_frag256 addr:$src2))))]>, OpSize; + (bitconvert (memopv4i64 addr:$src2))))]>, OpSize; } let ImmT = NoImm, Predicates = [HasAVX] in { let isCommutable = 0 in { - defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16, + defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", int_x86_ssse3_phadd_w_128, 0>, VEX_4V; - defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv4i32, + defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", int_x86_ssse3_phadd_d_128, 0>, VEX_4V; - defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv8i16, + defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", int_x86_ssse3_phadd_sw_128, 0>, VEX_4V; - defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv8i16, + defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", int_x86_ssse3_phsub_w_128, 0>, VEX_4V; - defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv4i32, + defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", int_x86_ssse3_phsub_d_128, 0>, VEX_4V; - defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv8i16, + defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", int_x86_ssse3_phsub_sw_128, 0>, VEX_4V; - defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv16i8, + defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V; - defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv16i8, + defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", int_x86_ssse3_pshuf_b_128, 0>, VEX_4V; - defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv16i8, + defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", int_x86_ssse3_psign_b_128, 0>, VEX_4V; - defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv8i16, + defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", int_x86_ssse3_psign_w_128, 0>, VEX_4V; - defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv4i32, + defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", int_x86_ssse3_psign_d_128, 0>, VEX_4V; } -defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16, +defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V; } let ImmT = NoImm, Predicates = [HasAVX2] in { let isCommutable = 0 in { - defm VPHADDW : SS3I_binop_rm_int_y<0x01, "vphaddw", memopv16i16, + defm VPHADDW : SS3I_binop_rm_int_y<0x01, "vphaddw", int_x86_avx2_phadd_w>, VEX_4V; - defm VPHADDD : SS3I_binop_rm_int_y<0x02, "vphaddd", memopv8i32, + defm VPHADDD : SS3I_binop_rm_int_y<0x02, "vphaddd", int_x86_avx2_phadd_d>, VEX_4V; - defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", memopv16i16, + defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", int_x86_avx2_phadd_sw>, VEX_4V; - defm VPHSUBW : SS3I_binop_rm_int_y<0x05, "vphsubw", memopv16i16, + defm VPHSUBW : SS3I_binop_rm_int_y<0x05, "vphsubw", int_x86_avx2_phsub_w>, VEX_4V; - defm VPHSUBD : SS3I_binop_rm_int_y<0x06, "vphsubd", memopv8i32, + defm VPHSUBD : SS3I_binop_rm_int_y<0x06, "vphsubd", int_x86_avx2_phsub_d>, VEX_4V; - defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", memopv16i16, + defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", int_x86_avx2_phsub_sw>, VEX_4V; - defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", memopv32i8, + defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", int_x86_avx2_pmadd_ub_sw>, VEX_4V; - defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", memopv32i8, + defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", int_x86_avx2_pshuf_b>, VEX_4V; - defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv32i8, + defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b>, VEX_4V; - defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv16i16, + defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w>, VEX_4V; - defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv8i32, + defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d>, VEX_4V; } -defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", memopv16i16, +defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", int_x86_avx2_pmul_hr_sw>, VEX_4V; } // None of these have i8 immediate fields. let ImmT = NoImm, Constraints = "$src1 = $dst" in { let isCommutable = 0 in { - defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16, + defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", int_x86_ssse3_phadd_w_128>; - defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32, + defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", int_x86_ssse3_phadd_d_128>; - defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16, + defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", int_x86_ssse3_phadd_sw_128>; - defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16, + defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", int_x86_ssse3_phsub_w_128>; - defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32, + defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", int_x86_ssse3_phsub_d_128>; - defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16, + defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", int_x86_ssse3_phsub_sw_128>; - defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8, + defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", int_x86_ssse3_pmadd_ub_sw_128>; - defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv16i8, + defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", int_x86_ssse3_pshuf_b_128>; - defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv16i8, + defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128>; - defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv8i16, + defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", int_x86_ssse3_psign_w_128>; - defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32, + defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", int_x86_ssse3_psign_d_128>; } -defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16, +defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", int_x86_ssse3_pmul_hr_sw_128>; } @@ -6017,8 +6058,18 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, Intrinsic F32Int, Intrinsic F64Int, bit Is2Addr = 1> { let ExeDomain = GenericDomain in { - // Intrinsic operation, reg. + // Operation, reg. def SSr : SS4AIi8<opcss, MRMSrcReg, + (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + []>, OpSize; + + // Intrinsic operation, reg. + def SSr_Int : SS4AIi8<opcss, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, @@ -6040,8 +6091,18 @@ let ExeDomain = GenericDomain in { (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>, OpSize; - // Intrinsic operation, reg. + // Operation, reg. def SDr : SS4AIi8<opcsd, MRMSrcReg, + (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + []>, OpSize; + + // Intrinsic operation, reg. + def SDr_Int : SS4AIi8<opcsd, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, @@ -6079,6 +6140,27 @@ let Predicates = [HasAVX] in { defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround", int_x86_sse41_round_ss, int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG; + + def : Pat<(ffloor FR32:$src), + (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>; + def : Pat<(f64 (ffloor FR64:$src)), + (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>; + def : Pat<(f32 (fnearbyint FR32:$src)), + (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>; + def : Pat<(f64 (fnearbyint FR64:$src)), + (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>; + def : Pat<(f32 (fceil FR32:$src)), + (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>; + def : Pat<(f64 (fceil FR64:$src)), + (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>; + def : Pat<(f32 (frint FR32:$src)), + (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>; + def : Pat<(f64 (frint FR64:$src)), + (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>; + def : Pat<(f32 (ftrunc FR32:$src)), + (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; + def : Pat<(f64 (ftrunc FR64:$src)), + (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; } defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128, @@ -6088,6 +6170,27 @@ let Constraints = "$src1 = $dst" in defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", int_x86_sse41_round_ss, int_x86_sse41_round_sd>; +def : Pat<(ffloor FR32:$src), + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>; +def : Pat<(f64 (ffloor FR64:$src)), + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>; +def : Pat<(f32 (fnearbyint FR32:$src)), + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>; +def : Pat<(f64 (fnearbyint FR64:$src)), + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>; +def : Pat<(f32 (fceil FR32:$src)), + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>; +def : Pat<(f64 (fceil FR64:$src)), + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>; +def : Pat<(f32 (frint FR32:$src)), + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>; +def : Pat<(f64 (frint FR64:$src)), + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>; +def : Pat<(f32 (ftrunc FR32:$src)), + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; +def : Pat<(f64 (ftrunc FR64:$src)), + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; + //===----------------------------------------------------------------------===// // SSE4.1 - Packed Bit Test //===----------------------------------------------------------------------===// @@ -6195,7 +6298,7 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (IntId128 - (bitconvert (memopv8i16 addr:$src))))]>, OpSize; + (bitconvert (memopv2i64 addr:$src))))]>, OpSize; } let Predicates = [HasAVX] in @@ -6221,7 +6324,7 @@ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; + (bitconvert (memopv2i64 addr:$src2))))]>, OpSize; } /// SS41I_binop_rm_int - Simple SSE 4.1 binary operator @@ -6237,7 +6340,7 @@ multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (IntId256 VR256:$src1, - (bitconvert (memopv32i8 addr:$src2))))]>, OpSize; + (bitconvert (memopv4i64 addr:$src2))))]>, OpSize; } let Predicates = [HasAVX] in { @@ -6400,38 +6503,38 @@ let Predicates = [HasAVX] in { let isCommutable = 0 in { let ExeDomain = SSEPackedSingle in { defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, - VR128, memopv16i8, i128mem, 0>, VEX_4V; + VR128, memopv4f32, i128mem, 0>, VEX_4V; defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", - int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V; + int_x86_avx_blend_ps_256, VR256, memopv8f32, i256mem, 0>, VEX_4V; } let ExeDomain = SSEPackedDouble in { defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, - VR128, memopv16i8, i128mem, 0>, VEX_4V; + VR128, memopv2f64, i128mem, 0>, VEX_4V; defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", - int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V; + int_x86_avx_blend_pd_256, VR256, memopv4f64, i256mem, 0>, VEX_4V; } defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw, - VR128, memopv16i8, i128mem, 0>, VEX_4V; + VR128, memopv2i64, i128mem, 0>, VEX_4V; defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, - VR128, memopv16i8, i128mem, 0>, VEX_4V; + VR128, memopv2i64, i128mem, 0>, VEX_4V; } let ExeDomain = SSEPackedSingle in defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, - VR128, memopv16i8, i128mem, 0>, VEX_4V; + VR128, memopv4f32, i128mem, 0>, VEX_4V; let ExeDomain = SSEPackedDouble in defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, - VR128, memopv16i8, i128mem, 0>, VEX_4V; + VR128, memopv2f64, i128mem, 0>, VEX_4V; let ExeDomain = SSEPackedSingle in defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, - VR256, memopv32i8, i256mem, 0>, VEX_4V; + VR256, memopv8f32, i256mem, 0>, VEX_4V; } let Predicates = [HasAVX2] in { let isCommutable = 0 in { defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw, - VR256, memopv32i8, i256mem, 0>, VEX_4V; + VR256, memopv4i64, i256mem, 0>, VEX_4V; defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw, - VR256, memopv32i8, i256mem, 0>, VEX_4V; + VR256, memopv4i64, i256mem, 0>, VEX_4V; } } @@ -6439,35 +6542,35 @@ let Constraints = "$src1 = $dst" in { let isCommutable = 0 in { let ExeDomain = SSEPackedSingle in defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps, - VR128, memopv16i8, i128mem>; + VR128, memopv4f32, i128mem>; let ExeDomain = SSEPackedDouble in defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd, - VR128, memopv16i8, i128mem>; + VR128, memopv2f64, i128mem>; defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw, - VR128, memopv16i8, i128mem>; + VR128, memopv2i64, i128mem>; defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, - VR128, memopv16i8, i128mem>; + VR128, memopv2i64, i128mem>; } let ExeDomain = SSEPackedSingle in defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, - VR128, memopv16i8, i128mem>; + VR128, memopv4f32, i128mem>; let ExeDomain = SSEPackedDouble in defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, - VR128, memopv16i8, i128mem>; + VR128, memopv2f64, i128mem>; } /// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop, PatFrag mem_frag, Intrinsic IntId> { - def rr : I<opc, MRMSrcReg, (outs RC:$dst), + def rr : Ii8<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; - def rm : I<opc, MRMSrcMem, (outs RC:$dst), + def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), @@ -6480,23 +6583,23 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, let Predicates = [HasAVX] in { let ExeDomain = SSEPackedDouble in { defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem, - memopv16i8, int_x86_sse41_blendvpd>; + memopv2f64, int_x86_sse41_blendvpd>; defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem, - memopv32i8, int_x86_avx_blendv_pd_256>; + memopv4f64, int_x86_avx_blendv_pd_256>; } // ExeDomain = SSEPackedDouble let ExeDomain = SSEPackedSingle in { defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem, - memopv16i8, int_x86_sse41_blendvps>; + memopv4f32, int_x86_sse41_blendvps>; defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem, - memopv32i8, int_x86_avx_blendv_ps_256>; + memopv8f32, int_x86_avx_blendv_ps_256>; } // ExeDomain = SSEPackedSingle defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem, - memopv16i8, int_x86_sse41_pblendvb>; + memopv2i64, int_x86_sse41_pblendvb>; } let Predicates = [HasAVX2] in { defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem, - memopv32i8, int_x86_avx2_pblendvb>; + memopv4i64, int_x86_avx2_pblendvb>; } let Predicates = [HasAVX] in { @@ -6537,7 +6640,8 @@ let Predicates = [HasAVX2] in { /// SS41I_ternary_int - SSE 4.1 ternary operator let Uses = [XMM0], Constraints = "$src1 = $dst" in { - multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> { + multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag, + Intrinsic IntId> { def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, @@ -6551,15 +6655,18 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in { "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId VR128:$src1, - (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize; + (bitconvert (mem_frag addr:$src2)), XMM0))]>, OpSize; } } let ExeDomain = SSEPackedDouble in -defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; +defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64, + int_x86_sse41_blendvpd>; let ExeDomain = SSEPackedSingle in -defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; -defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; +defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32, + int_x86_sse41_blendvps>; +defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, + int_x86_sse41_pblendvb>; let Predicates = [HasSSE41] in { def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1), @@ -6614,8 +6721,7 @@ multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; + (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize; } /// SS42I_binop_rm_int - Simple SSE 4.2 binary operator @@ -6630,8 +6736,7 @@ multiclass SS42I_binop_rm_int_y<bits<8> opc, string OpcodeStr, (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, - (IntId256 VR256:$src1, - (bitconvert (memopv32i8 addr:$src2))))]>, OpSize; + (IntId256 VR256:$src1, (memopv4i64 addr:$src2)))]>, OpSize; } let Predicates = [HasAVX] in { @@ -6913,7 +7018,7 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; + (bitconvert (memopv2i64 addr:$src2))))]>, OpSize; } // Perform One Round of an AES Encryption/Decryption Flow @@ -7144,7 +7249,7 @@ def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), //===----------------------------------------------------------------------===// // VINSERTF128 - Insert packed floating-point values // -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR128:$src2, i8imm:$src3), "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", @@ -7163,35 +7268,10 @@ def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3), def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3), (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; -def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; - //===----------------------------------------------------------------------===// // VEXTRACTF128 - Extract packed floating-point values // -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), (ins VR256:$src1, i8imm:$src2), "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -7210,31 +7290,6 @@ def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v4f32 (VEXTRACTF128rr - (v8f32 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v2f64 (VEXTRACTF128rr - (v4f64 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v4i32 (VEXTRACTF128rr - (v8i32 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v2i64 (VEXTRACTF128rr - (v4i64 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v8i16 (VEXTRACTF128rr - (v16i16 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v16i8 (VEXTRACTF128rr - (v32i8 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; - //===----------------------------------------------------------------------===// // VMASKMOV - Conditional SIMD Packed Loads and Stores // @@ -7288,7 +7343,8 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop_i:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V; + [(set RC:$dst, (IntVar RC:$src1, + (bitconvert (i_frag addr:$src2))))]>, VEX_4V; def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2), @@ -7302,11 +7358,11 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, let ExeDomain = SSEPackedSingle in { defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, - memopv4f32, memopv4i32, + memopv4f32, memopv2i64, int_x86_avx_vpermilvar_ps, int_x86_avx_vpermil_ps>; defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, - memopv8f32, memopv8i32, + memopv8f32, memopv4i64, int_x86_avx_vpermilvar_ps_256, int_x86_avx_vpermil_ps_256>; } @@ -7321,19 +7377,28 @@ let ExeDomain = SSEPackedDouble in { int_x86_avx_vpermil_pd_256>; } -def : Pat<(v8f32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))), +def : Pat<(v8f32 (X86VPermilp VR256:$src1, (i8 imm:$imm))), (VPERMILPSYri VR256:$src1, imm:$imm)>; -def : Pat<(v4f64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))), +def : Pat<(v4f64 (X86VPermilp VR256:$src1, (i8 imm:$imm))), (VPERMILPDYri VR256:$src1, imm:$imm)>; -def : Pat<(v8i32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))), +def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))), (VPERMILPSYri VR256:$src1, imm:$imm)>; -def : Pat<(v4i64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))), +def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))), (VPERMILPDYri VR256:$src1, imm:$imm)>; +def : Pat<(v8f32 (X86VPermilp (memopv8f32 addr:$src1), (i8 imm:$imm))), + (VPERMILPSYmi addr:$src1, imm:$imm)>; +def : Pat<(v4f64 (X86VPermilp (memopv4f64 addr:$src1), (i8 imm:$imm))), + (VPERMILPDYmi addr:$src1, imm:$imm)>; +def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (memopv4i64 addr:$src1)), + (i8 imm:$imm))), + (VPERMILPSYmi addr:$src1, imm:$imm)>; +def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))), + (VPERMILPDYmi addr:$src1, imm:$imm)>; //===----------------------------------------------------------------------===// // VPERM2F128 - Permute Floating-Point Values in 128-bit chunks // -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", @@ -7359,22 +7424,9 @@ def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, (memopv4f64 addr:$src2), imm:$src3), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; def : Pat<(int_x86_avx_vperm2f128_si_256 - VR256:$src1, (memopv8i32 addr:$src2), imm:$src3), + VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), imm:$src3), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; -def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v8i32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v4i64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v4f64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; - //===----------------------------------------------------------------------===// // VZERO - Zero YMM registers // @@ -7451,9 +7503,9 @@ multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr, let isCommutable = 0 in { defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128, - VR128, memopv16i8, i128mem>; + VR128, memopv2i64, i128mem>; defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256, - VR256, memopv32i8, i256mem>; + VR256, memopv4i64, i256mem>; } //===----------------------------------------------------------------------===// @@ -7541,11 +7593,12 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (Int VR256:$src1, (mem_frag addr:$src2)))]>, + [(set VR256:$dst, (Int VR256:$src1, + (bitconvert (mem_frag addr:$src2))))]>, VEX_4V; } -defm VPERMD : avx2_perm<0x36, "vpermd", memopv8i32, int_x86_avx2_permd>; +defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, int_x86_avx2_permd>; let ExeDomain = SSEPackedSingle in defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>; @@ -7571,7 +7624,7 @@ defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>, VEX_W; //===----------------------------------------------------------------------===// -// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks +// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks // def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), @@ -7587,6 +7640,64 @@ def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), imm:$src3))]>, VEX_4V; +let Predicates = [HasAVX2] in { +def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; + +def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (memopv4i64 addr:$src2)), + (i8 imm:$imm))), + (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, + (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))), + (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), + (i8 imm:$imm))), + (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2), + (i8 imm:$imm))), + (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; +} + +// AVX1 patterns +def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; + +def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1, + (memopv8f32 addr:$src2), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, + (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, + (memopv4i64 addr:$src2), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, + (memopv4f64 addr:$src2), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, + (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, + (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; + + //===----------------------------------------------------------------------===// // VINSERTI128 - Insert packed integer values // @@ -7603,6 +7714,51 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), (int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2), imm:$src3))]>, VEX_4V; +let Predicates = [HasAVX2] in { +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), + (i32 imm)), + (VINSERTI128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), + (i32 imm)), + (VINSERTI128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), + (i32 imm)), + (VINSERTI128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), + (i32 imm)), + (VINSERTI128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +} + +// AVX1 patterns +def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; + //===----------------------------------------------------------------------===// // VEXTRACTI128 - Extract packed integer values // @@ -7617,6 +7773,51 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), (ins i128mem:$dst, VR256:$src1, i8imm:$src2), "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; +let Predicates = [HasAVX2] in { +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2i64 (VEXTRACTI128rr + (v4i64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4i32 (VEXTRACTI128rr + (v8i32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v8i16 (VEXTRACTI128rr + (v16i16 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v16i8 (VEXTRACTI128rr + (v32i8 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +} + +// AVX1 patterns +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4f32 (VEXTRACTF128rr + (v8f32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2f64 (VEXTRACTF128rr + (v4f64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2i64 (VEXTRACTF128rr + (v4i64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4i32 (VEXTRACTF128rr + (v8i32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v8i16 (VEXTRACTF128rr + (v16i16 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v16i8 (VEXTRACTF128rr + (v32i8 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; + //===----------------------------------------------------------------------===// // VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores // diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td new file mode 100644 index 0000000..64cc44d --- /dev/null +++ b/lib/Target/X86/X86InstrXOP.td @@ -0,0 +1,243 @@ +//====- X86InstrXOP.td - Describe the X86 Instruction Set --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes XOP (eXtended OPerations) +// +//===----------------------------------------------------------------------===// + +multiclass xop2op<bits<8> opc, string OpcodeStr, X86MemOperand x86memop> { + def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + []>, VEX; + def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + []>, VEX; +} + +let isAsmParserOnly = 1 in { + defm VPHSUBWD : xop2op<0xE2, "vphsubwd", f128mem>; + defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", f128mem>; + defm VPHSUBBW : xop2op<0xE1, "vphsubbw", f128mem>; + defm VPHADDWQ : xop2op<0xC7, "vphaddwq", f128mem>; + defm VPHADDWD : xop2op<0xC6, "vphaddwd", f128mem>; + defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", f128mem>; + defm VPHADDUWD : xop2op<0xD6, "vphadduwd", f128mem>; + defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", f128mem>; + defm VPHADDUBW : xop2op<0xD1, "vphaddubw", f128mem>; + defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", f128mem>; + defm VPHADDUBD : xop2op<0xD2, "vphaddubd", f128mem>; + defm VPHADDDQ : xop2op<0xCB, "vphadddq", f128mem>; + defm VPHADDBW : xop2op<0xC1, "vphaddbw", f128mem>; + defm VPHADDBQ : xop2op<0xC3, "vphaddbq", f128mem>; + defm VPHADDBD : xop2op<0xC2, "vphaddbd", f128mem>; + defm VFRCZSS : xop2op<0x82, "vfrczss", f32mem>; + defm VFRCZSD : xop2op<0x83, "vfrczsd", f64mem>; + defm VFRCZPS : xop2op<0x80, "vfrczps", f128mem>; + defm VFRCZPD : xop2op<0x81, "vfrczpd", f128mem>; +} + +multiclass xop2op256<bits<8> opc, string OpcodeStr> { + def rrY : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + []>, VEX, VEX_L; + def rmY : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + []>, VEX; +} + +let isAsmParserOnly = 1 in { + defm VFRCZPS : xop2op256<0x80, "vfrczps">; + defm VFRCZPD : xop2op256<0x81, "vfrczpd">; +} + +multiclass xop3op<bits<8> opc, string OpcodeStr> { + def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4VOp3; + def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4V, VEX_W; + def mr : IXOP<opc, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src1, VR128:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4VOp3; +} + +let isAsmParserOnly = 1 in { + defm VPSHLW : xop3op<0x95, "vpshlw">; + defm VPSHLQ : xop3op<0x97, "vpshlq">; + defm VPSHLD : xop3op<0x96, "vpshld">; + defm VPSHLB : xop3op<0x94, "vpshlb">; + defm VPSHAW : xop3op<0x99, "vpshaw">; + defm VPSHAQ : xop3op<0x9B, "vpshaq">; + defm VPSHAD : xop3op<0x9A, "vpshad">; + defm VPSHAB : xop3op<0x98, "vpshab">; + defm VPROTW : xop3op<0x91, "vprotw">; + defm VPROTQ : xop3op<0x93, "vprotq">; + defm VPROTD : xop3op<0x92, "vprotd">; + defm VPROTB : xop3op<0x90, "vprotb">; +} + +multiclass xop3opimm<bits<8> opc, string OpcodeStr> { + def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, i8imm:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX; + def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src1, i8imm:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX; +} + +let isAsmParserOnly = 1 in { + defm VPROTW : xop3opimm<0xC1, "vprotw">; + defm VPROTQ : xop3opimm<0xC3, "vprotq">; + defm VPROTD : xop3opimm<0xC2, "vprotd">; + defm VPROTB : xop3opimm<0xC0, "vprotb">; +} + +// Instruction where second source can be memory, but third must be register +multiclass xop4opm2<bits<8> opc, string OpcodeStr> { + def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, VEX_4V, VEX_I8IMM; + def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, VEX_4V, VEX_I8IMM; +} + +let isAsmParserOnly = 1 in { + defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd">; + defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd">; + defm VPMACSWW : xop4opm2<0x95, "vpmacsww">; + defm VPMACSWD : xop4opm2<0x96, "vpmacswd">; + defm VPMACSSWW : xop4opm2<0x85, "vpmacssww">; + defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd">; + defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql">; + defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh">; + defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd">; + defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql">; + defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh">; + defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd">; +} + +// Instruction where second source can be memory, third must be imm8 +multiclass xop4opimm<bits<8> opc, string OpcodeStr> { + def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, VEX_4V; + def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2, i8imm:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, VEX_4V; +} + +let isAsmParserOnly = 1 in { + defm VPCOMW : xop4opimm<0xCD, "vpcomw">; + defm VPCOMUW : xop4opimm<0xED, "vpcomuw">; + defm VPCOMUQ : xop4opimm<0xEF, "vpcomuq">; + defm VPCOMUD : xop4opimm<0xEE, "vpcomud">; + defm VPCOMUB : xop4opimm<0xEC, "vpcomub">; + defm VPCOMQ : xop4opimm<0xCF, "vpcomq">; + defm VPCOMD : xop4opimm<0xCE, "vpcomd">; + defm VPCOMB : xop4opimm<0xCC, "vpcomb">; +} + +// Instruction where either second or third source can be memory +multiclass xop4op<bits<8> opc, string OpcodeStr> { + def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, VEX_4V, VEX_I8IMM; + def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, f128mem:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, VEX_4V, VEX_I8IMM, XOP_W; + def mr : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, VEX_4V, VEX_I8IMM; +} + +let isAsmParserOnly = 1 in { + defm VPPERM : xop4op<0xA3, "vpperm">; + defm VPCMOV : xop4op<0xA2, "vpcmov">; +} + +multiclass xop4op256<bits<8> opc, string OpcodeStr> { + def rrY : IXOPi8<opc, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, VR256:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, VEX_4V, VEX_I8IMM; + def rmY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, f256mem:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, VEX_4V, VEX_I8IMM, XOP_W; + def mrY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, f256mem:$src2, VR256:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, VEX_4V, VEX_I8IMM; +} + +let isAsmParserOnly = 1 in { + defm VPCMOV : xop4op256<0xA2, "vpcmov">; +} + +multiclass xop5op<bits<8> opc, string OpcodeStr> { + def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, VR128:$src3, i8imm:$src4), + !strconcat(OpcodeStr, + "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), + []>; + def rm : IXOP5<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, f128mem:$src3, i8imm:$src4), + !strconcat(OpcodeStr, + "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), + []>, XOP_W; + def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2, VR128:$src3, i8imm:$src4), + !strconcat(OpcodeStr, + "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), + []>; + def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, VR256:$src3, i8imm:$src4), + !strconcat(OpcodeStr, + "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), + []>; + def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, f256mem:$src3, i8imm:$src4), + !strconcat(OpcodeStr, + "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), + []>, XOP_W; + def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, f256mem:$src2, VR256:$src3, i8imm:$src4), + !strconcat(OpcodeStr, + "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), + []>; +} + +let isAsmParserOnly = 1 in { + defm VPERMIL2PD : xop5op<0x49, "vpermil2pd">; + defm VPERMIL2PS : xop5op<0x48, "vpermil2ps">; +} diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 3f88fa6..2145a33 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -424,7 +424,9 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { TargetJITInfo::LazyResolverFn X86JITInfo::getLazyResolverFunction(JITCompilerFn F) { + TsanIgnoreWritesBegin(); JITCompilerFunction = F; + TsanIgnoreWritesEnd(); #if defined (X86_32_JIT) && !defined (_MSC_VER) if (Subtarget->hasSSE1()) diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 81ee665..9232196 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -368,10 +368,6 @@ ReSimplify: case X86::SETB_C64r: LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break; case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break; case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break; - case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; - case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; - case X86::VFsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break; - case X86::VFsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break; case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break; case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break; case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index c1ac9f3..4e80432 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -452,7 +452,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return (RealignStack && + return (MF.getTarget().Options.RealignStack && !MFI->hasVarSizedObjects()); } @@ -583,7 +583,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // sure we restore the stack pointer immediately after the call, there may // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. MachineBasicBlock::iterator B = MBB.begin(); - while (I != B && !llvm::prior(I)->getDesc().isCall()) + while (I != B && !llvm::prior(I)->isCall()) --I; MBB.insert(I, New); } @@ -665,7 +665,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { case MVT::i8: if (High) { switch (Reg) { - default: return 0; + default: return getX86SubSuperRegister(Reg, MVT::i64, High); case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::AH; case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: @@ -785,6 +785,22 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { return X86::R15D; } case MVT::i64: + // For 64-bit mode if we've requested a "high" register and the + // Q or r constraints we want one of these high registers or + // just the register name otherwise. + if (High) { + switch (Reg) { + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::SI; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::DI; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::BP; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::SP; + // Fallthrough. + } + } switch (Reg) { default: return Reg; case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index e7bcbf8..6e092c7 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -273,6 +273,8 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { if (IsAMD && ((ECX >> 16) & 0x1)) { HasFMA4 = true; ToggleFeature(X86::FeatureFMA4); + HasXOP = true; + ToggleFeature(X86::FeatureXOP); } } } @@ -317,6 +319,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , HasCLMUL(false) , HasFMA3(false) , HasFMA4(false) + , HasXOP(false) , HasMOVBE(false) , HasRDRAND(false) , HasF16C(false) @@ -387,9 +390,6 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, assert((!In64BitMode || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); - if(EnableSegmentedStacks && !isTargetELF()) - report_fatal_error("Segmented stacks are only implemented on ELF."); - // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both // 32 and 64 bit) and for all 64-bit targets. if (StackAlignOverride) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index e93f8e9..ccb9be0 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -93,6 +93,9 @@ protected: /// HasFMA4 - Target has 4-operand fused multiply-add bool HasFMA4; + /// HasXOP - Target has XOP instructions + bool HasXOP; + /// HasMOVBE - True if the processor has the MOVBE instruction. bool HasMOVBE; @@ -198,6 +201,7 @@ public: bool hasCLMUL() const { return HasCLMUL; } bool hasFMA3() const { return HasFMA3; } bool hasFMA4() const { return HasFMA4; } + bool hasXOP() const { return HasXOP; } bool hasMOVBE() const { return HasMOVBE; } bool hasRDRAND() const { return HasRDRAND; } bool hasF16C() const { return HasF16C; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 1c9f3bd..126042e 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -31,9 +31,10 @@ extern "C" void LLVMInitializeX86Target() { X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : X86TargetMachine(T, TT, CPU, FS, RM, CM, OL, false), + : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false), DataLayout(getSubtargetImpl()->isTargetDarwin() ? "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-" "n8:16:32-S128" : @@ -52,9 +53,10 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : X86TargetMachine(T, TT, CPU, FS, RM, CM, OL, true), + : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true), DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" "n8:16:32:64-S128"), InstrInfo(*this), @@ -67,11 +69,12 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, /// X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), - Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, Options.StackAlignmentOverride, is64Bit), FrameLowering(*this, Subtarget), ELFWriterInfo(is64Bit, true) { // Determine the PICStyle based on the target selected. @@ -95,8 +98,11 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, } // default to hard float ABI - if (FloatABIType == FloatABI::Default) - FloatABIType = FloatABI::Hard; + if (Options.FloatABIType == FloatABI::Default) + this->Options.FloatABIType = FloatABI::Hard; + + if (Options.EnableSegmentedStacks && !Subtarget.isTargetELF()) + report_fatal_error("Segmented stacks are only implemented on ELF."); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 64be458..3ac1769 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -38,7 +38,7 @@ class X86TargetMachine : public LLVMTargetMachine { public: X86TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64Bit); @@ -85,7 +85,7 @@ class X86_32TargetMachine : public X86TargetMachine { X86JITInfo JITInfo; public: X86_32TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); virtual const TargetData *getTargetData() const { return &DataLayout; } @@ -113,7 +113,7 @@ class X86_64TargetMachine : public X86TargetMachine { X86JITInfo JITInfo; public: X86_64TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); virtual const TargetData *getTargetData() const { return &DataLayout; } diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index 9bb54a8..f8c30eb 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -220,7 +220,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { MachineInstr *MI = I; DebugLoc dl = I->getDebugLoc(); - bool isControlFlow = MI->getDesc().isCall() || MI->getDesc().isReturn(); + bool isControlFlow = MI->isCall() || MI->isReturn(); // Shortcut: don't need to check regular instructions in dirty state. if (!isControlFlow && CurState == ST_DIRTY) diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt index d91da8c..de4abfc 100644 --- a/lib/Target/XCore/CMakeLists.txt +++ b/lib/Target/XCore/CMakeLists.txt @@ -21,17 +21,5 @@ add_llvm_target(XCoreCodeGen XCoreSelectionDAGInfo.cpp ) -add_llvm_library_dependencies(LLVMXCoreCodeGen - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMC - LLVMSelectionDAG - LLVMSupport - LLVMTarget - LLVMXCoreDesc - LLVMXCoreInfo - ) - add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/XCore/LLVMBuild.txt b/lib/Target/XCore/LLVMBuild.txt index 1f7e2d5..53b4a9e 100644 --- a/lib/Target/XCore/LLVMBuild.txt +++ b/lib/Target/XCore/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = MCTargetDesc TargetInfo + [component_0] type = TargetGroup name = XCore @@ -27,4 +30,3 @@ name = XCoreCodeGen parent = XCore required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support Target XCoreDesc XCoreInfo add_to_library_groups = XCore - diff --git a/lib/Target/XCore/MCTargetDesc/CMakeLists.txt b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt index 269822d..3a3f5b4 100644 --- a/lib/Target/XCore/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt @@ -3,11 +3,6 @@ add_llvm_library(LLVMXCoreDesc XCoreMCAsmInfo.cpp ) -add_llvm_library_dependencies(LLVMXCoreDesc - LLVMMC - LLVMXCoreInfo - ) - add_dependencies(LLVMXCoreDesc XCoreCommonTableGen) # Hack: we need to include 'main' target directory to grab private headers diff --git a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt index 628afb5..a80c939 100644 --- a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = XCoreDesc parent = XCore required_libraries = MC XCoreInfo add_to_library_groups = XCore - diff --git a/lib/Target/XCore/TargetInfo/CMakeLists.txt b/lib/Target/XCore/TargetInfo/CMakeLists.txt index 7f84f69..2c34b87 100644 --- a/lib/Target/XCore/TargetInfo/CMakeLists.txt +++ b/lib/Target/XCore/TargetInfo/CMakeLists.txt @@ -4,10 +4,4 @@ add_llvm_library(LLVMXCoreInfo XCoreTargetInfo.cpp ) -add_llvm_library_dependencies(LLVMXCoreInfo - LLVMMC - LLVMSupport - LLVMTarget - ) - add_dependencies(LLVMXCoreInfo XCoreCommonTableGen) diff --git a/lib/Target/XCore/TargetInfo/LLVMBuild.txt b/lib/Target/XCore/TargetInfo/LLVMBuild.txt index d0b8e54..770ba87 100644 --- a/lib/Target/XCore/TargetInfo/LLVMBuild.txt +++ b/lib/Target/XCore/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = XCoreInfo parent = XCore required_libraries = MC Support Target add_to_library_groups = XCore - diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h index b8fb0ca..08f091e 100644 --- a/lib/Target/XCore/XCore.h +++ b/lib/Target/XCore/XCore.h @@ -24,7 +24,8 @@ namespace llvm { class XCoreTargetMachine; class formatted_raw_ostream; - FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM); + FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM, + CodeGenOpt::Level OptLevel); } // end namespace llvm; diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index 7f8b169..5007d04 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -84,7 +84,8 @@ XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti) } bool XCoreFrameLowering::hasFP(const MachineFunction &MF) const { - return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects(); + return MF.getTarget().Options.DisableFramePointerElim(MF) || + MF.getFrameInfo()->hasVarSizedObjects(); } void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const { diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 8d746ae..7564fba 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -41,8 +41,8 @@ namespace { const XCoreSubtarget &Subtarget; public: - XCoreDAGToDAGISel(XCoreTargetMachine &TM) - : SelectionDAGISel(TM), + XCoreDAGToDAGISel(XCoreTargetMachine &TM, CodeGenOpt::Level OptLevel) + : SelectionDAGISel(TM, OptLevel), Lowering(*TM.getTargetLowering()), Subtarget(*TM.getSubtargetImpl()) { } @@ -83,8 +83,9 @@ namespace { /// createXCoreISelDag - This pass converts a legalized DAG into a /// XCore-specific DAG, ready for instruction scheduling. /// -FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM) { - return new XCoreDAGToDAGISel(TM); +FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new XCoreDAGToDAGISel(TM, OptLevel); } bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base, diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index d791daa..c5c668e 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -109,6 +109,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::ROTL , MVT::i32, Expand); setOperationAction(ISD::ROTR , MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::TRAP, MVT::Other, Legal); diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index eec3674..7e1e035 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -21,9 +21,10 @@ using namespace llvm; /// XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS), DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-" "i16:16:32-i32:32:32-i64:32:32-n32"), @@ -34,7 +35,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, } bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM) { - PM.add(createXCoreISelDag(*this)); + PM.add(createXCoreISelDag(*this, getOptLevel())); return false; } diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index 3f2644d..0159b1e 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -33,7 +33,7 @@ class XCoreTargetMachine : public LLVMTargetMachine { XCoreSelectionDAGInfo TSInfo; public: XCoreTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); |