diff options
Diffstat (limited to 'lib/Target')
131 files changed, 6355 insertions, 4012 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index cd3c0e0..69e2346 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -224,7 +224,7 @@ def : ProcNoItin<"cortex-m3", [HasV7Ops, def : ProcNoItin<"cortex-m4", [HasV7Ops, FeatureThumb2, FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2, - FeatureT2XtPk, FeatureVFP2, + FeatureT2XtPk, FeatureVFP4, FeatureVFPOnlySP, FeatureMClass]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 9a1ce06..e9e2803 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -529,10 +529,24 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, return false; } - // These modifiers are not yet supported. + // This modifier is not yet supported. case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1. - case 'H': // The highest-numbered register of a pair. return true; + case 'H': { // The highest-numbered register of a pair. + const MachineOperand &MO = MI->getOperand(OpNum); + if (!MO.isReg()) + return true; + const TargetRegisterClass &RC = ARM::GPRRegClass; + const MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + + unsigned RegIdx = TRI->getEncodingValue(MO.getReg()); + RegIdx |= 1; //The odd register is also the higher-numbered one of a pair. + + unsigned Reg = RC.getRegister(RegIdx); + O << ARMInstPrinter::getRegisterName(Reg); + return false; + } } } @@ -1136,8 +1150,14 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { assert(SrcReg == ARM::SP && "Only stack pointer as a source reg is supported"); for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset; - i != NumOps; ++i) - RegList.push_back(MI->getOperand(i).getReg()); + i != NumOps; ++i) { + const MachineOperand &MO = MI->getOperand(i); + // Actually, there should never be any impdef stuff here. Skip it + // temporary to workaround PR11902. + if (MO.isImplicit()) + continue; + RegList.push_back(MO.getReg()); + } break; case ARM::STR_PRE_IMM: case ARM::STR_PRE_REG: diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 714238a..29033e5 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -795,8 +795,28 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } else llvm_unreachable("Unknown reg class!"); break; + case 24: + if (ARM::DTripleRegClass.hasSubClassEq(RC)) { + // Use aligned spills if the stack can be realigned. + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo)) + .addFrameIndex(FI).addImm(16) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO)); + } else { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) + .addFrameIndex(FI)) + .addMemOperand(MMO); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); + AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); + } + } else + llvm_unreachable("Unknown reg class!"); + break; case 32: - if (ARM::QQPRRegClass.hasSubClassEq(RC)) { + if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { // FIXME: It's possible to only store part of the QQ register if the // spilled def has a sub-register index. @@ -868,6 +888,8 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, } break; case ARM::VST1q64: + case ARM::VST1d64TPseudo: + case ARM::VST1d64QPseudo: if (MI->getOperand(0).isFI() && MI->getOperand(2).getSubReg() == 0) { FrameIndex = MI->getOperand(0).getIndex(); @@ -942,8 +964,28 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } else llvm_unreachable("Unknown reg class!"); break; - case 32: - if (ARM::QQPRRegClass.hasSubClassEq(RC)) { + case 24: + if (ARM::DTripleRegClass.hasSubClassEq(RC)) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) + .addFrameIndex(FI).addImm(16) + .addMemOperand(MMO)); + } else { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) + .addFrameIndex(FI) + .addMemOperand(MMO)); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + MIB.addReg(DestReg, RegState::ImplicitDefine); + } + } else + llvm_unreachable("Unknown reg class!"); + break; + case 32: + if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) .addFrameIndex(FI).addImm(16) @@ -1016,6 +1058,8 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, } break; case ARM::VLD1q64: + case ARM::VLD1d64TPseudo: + case ARM::VLD1d64QPseudo: if (MI->getOperand(1).isFI() && MI->getOperand(0).getSubReg() == 0) { FrameIndex = MI->getOperand(1).getIndex(); @@ -1524,6 +1568,139 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); } +/// Identify instructions that can be folded into a MOVCC instruction, and +/// return the corresponding opcode for the predicated pseudo-instruction. +static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI, + const MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return 0; + if (!MRI.hasOneNonDBGUse(Reg)) + return 0; + MI = MRI.getVRegDef(Reg); + if (!MI) + return 0; + // Check if MI has any non-dead defs or physreg uses. This also detects + // predicated instructions which will be reading CPSR. + for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + // Reject frame index operands, PEI can't handle the predicated pseudos. + if (MO.isFI() || MO.isCPI() || MO.isJTI()) + return 0; + if (!MO.isReg()) + continue; + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + return 0; + if (MO.isDef() && !MO.isDead()) + return 0; + } + switch (MI->getOpcode()) { + default: return 0; + case ARM::ANDri: return ARM::ANDCCri; + case ARM::ANDrr: return ARM::ANDCCrr; + case ARM::ANDrsi: return ARM::ANDCCrsi; + case ARM::ANDrsr: return ARM::ANDCCrsr; + case ARM::t2ANDri: return ARM::t2ANDCCri; + case ARM::t2ANDrr: return ARM::t2ANDCCrr; + case ARM::t2ANDrs: return ARM::t2ANDCCrs; + case ARM::EORri: return ARM::EORCCri; + case ARM::EORrr: return ARM::EORCCrr; + case ARM::EORrsi: return ARM::EORCCrsi; + case ARM::EORrsr: return ARM::EORCCrsr; + case ARM::t2EORri: return ARM::t2EORCCri; + case ARM::t2EORrr: return ARM::t2EORCCrr; + case ARM::t2EORrs: return ARM::t2EORCCrs; + case ARM::ORRri: return ARM::ORRCCri; + case ARM::ORRrr: return ARM::ORRCCrr; + case ARM::ORRrsi: return ARM::ORRCCrsi; + case ARM::ORRrsr: return ARM::ORRCCrsr; + case ARM::t2ORRri: return ARM::t2ORRCCri; + case ARM::t2ORRrr: return ARM::t2ORRCCrr; + case ARM::t2ORRrs: return ARM::t2ORRCCrs; + + // ARM ADD/SUB + case ARM::ADDri: return ARM::ADDCCri; + case ARM::ADDrr: return ARM::ADDCCrr; + case ARM::ADDrsi: return ARM::ADDCCrsi; + case ARM::ADDrsr: return ARM::ADDCCrsr; + case ARM::SUBri: return ARM::SUBCCri; + case ARM::SUBrr: return ARM::SUBCCrr; + case ARM::SUBrsi: return ARM::SUBCCrsi; + case ARM::SUBrsr: return ARM::SUBCCrsr; + + // Thumb2 ADD/SUB + case ARM::t2ADDri: return ARM::t2ADDCCri; + case ARM::t2ADDri12: return ARM::t2ADDCCri12; + case ARM::t2ADDrr: return ARM::t2ADDCCrr; + case ARM::t2ADDrs: return ARM::t2ADDCCrs; + case ARM::t2SUBri: return ARM::t2SUBCCri; + case ARM::t2SUBri12: return ARM::t2SUBCCri12; + case ARM::t2SUBrr: return ARM::t2SUBCCrr; + case ARM::t2SUBrs: return ARM::t2SUBCCrs; + } +} + +bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI, + SmallVectorImpl<MachineOperand> &Cond, + unsigned &TrueOp, unsigned &FalseOp, + bool &Optimizable) const { + assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && + "Unknown select instruction"); + // MOVCC operands: + // 0: Def. + // 1: True use. + // 2: False use. + // 3: Condition code. + // 4: CPSR use. + TrueOp = 1; + FalseOp = 2; + Cond.push_back(MI->getOperand(3)); + Cond.push_back(MI->getOperand(4)); + // We can always fold a def. + Optimizable = true; + return false; +} + +MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, + bool PreferFalse) const { + assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && + "Unknown select instruction"); + const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + MachineInstr *DefMI = 0; + unsigned Opc = canFoldIntoMOVCC(MI->getOperand(2).getReg(), DefMI, MRI); + bool Invert = !Opc; + if (!Opc) + Opc = canFoldIntoMOVCC(MI->getOperand(1).getReg(), DefMI, MRI); + if (!Opc) + return 0; + + // Create a new predicated version of DefMI. + // Rfalse is the first use. + MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + get(Opc), MI->getOperand(0).getReg()) + .addOperand(MI->getOperand(Invert ? 2 : 1)); + + // Copy all the DefMI operands, excluding its (null) predicate. + const MCInstrDesc &DefDesc = DefMI->getDesc(); + for (unsigned i = 1, e = DefDesc.getNumOperands(); + i != e && !DefDesc.OpInfo[i].isPredicate(); ++i) + NewMI.addOperand(DefMI->getOperand(i)); + + unsigned CondCode = MI->getOperand(3).getImm(); + if (Invert) + NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode))); + else + NewMI.addImm(CondCode); + NewMI.addOperand(MI->getOperand(4)); + + // DefMI is not the -S version that sets CPSR, so add an optional %noreg. + if (NewMI->hasOptionalDef()) + AddDefaultCC(NewMI); + + // The caller will erase MI, but not DefMI. + DefMI->eraseFromParent(); + return NewMI; +} + /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the /// instruction is encoded with an 'S' bit is determined by the optional CPSR /// def operand. @@ -3180,11 +3357,18 @@ enum ARMExeDomain { // std::pair<uint16_t, uint16_t> ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { - // VMOVD is a VFP instruction, but can be changed to NEON if it isn't - // predicated. + // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON + // if they are not predicated. if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); + // Cortex-A9 is particularly picky about mixing the two and wants these + // converted. + if (Subtarget.isCortexA9() && !isPredicated(MI) && + (MI->getOpcode() == ARM::VMOVRS || + MI->getOpcode() == ARM::VMOVSR)) + return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); + // No other instructions can be swizzled, so just determine their domain. unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask; @@ -3204,22 +3388,95 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { void ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { - // We only know how to change VMOVD into VORR. - assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD"); - if (Domain != ExeNEON) - return; + unsigned DstReg, SrcReg, DReg; + unsigned Lane; + MachineInstrBuilder MIB(MI); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + bool isKill; + switch (MI->getOpcode()) { + default: + llvm_unreachable("cannot handle opcode!"); + break; + case ARM::VMOVD: + if (Domain != ExeNEON) + break; - // Zap the predicate operands. - assert(!isPredicated(MI) && "Cannot predicate a VORRd"); - MI->RemoveOperand(3); - MI->RemoveOperand(2); + // Zap the predicate operands. + assert(!isPredicated(MI) && "Cannot predicate a VORRd"); + MI->RemoveOperand(3); + MI->RemoveOperand(2); - // Change to a VORRd which requires two identical use operands. - MI->setDesc(get(ARM::VORRd)); + // Change to a VORRd which requires two identical use operands. + MI->setDesc(get(ARM::VORRd)); + + // Add the extra source operand and new predicates. + // This will go before any implicit ops. + AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); + break; + case ARM::VMOVRS: + if (Domain != ExeNEON) + break; + assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); + + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + + DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass); + Lane = 0; + if (DReg == ARM::NoRegister) { + DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass); + Lane = 1; + assert(DReg && "S-register with no D super-register?"); + } + + MI->RemoveOperand(3); + MI->RemoveOperand(2); + MI->RemoveOperand(1); + + MI->setDesc(get(ARM::VGETLNi32)); + MIB.addReg(DReg); + MIB.addImm(Lane); + + MIB->getOperand(1).setIsUndef(); + MIB.addReg(SrcReg, RegState::Implicit); + + AddDefaultPred(MIB); + break; + case ARM::VMOVSR: + if (Domain != ExeNEON) + break; + assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); + + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass); + Lane = 0; + if (DReg == ARM::NoRegister) { + DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass); + Lane = 1; + assert(DReg && "S-register with no D super-register?"); + } + isKill = MI->getOperand(0).isKill(); + + MI->RemoveOperand(3); + MI->RemoveOperand(2); + MI->RemoveOperand(1); + MI->RemoveOperand(0); + + MI->setDesc(get(ARM::VSETLNi32)); + MIB.addReg(DReg, RegState::Define); + MIB.addReg(DReg, RegState::Undef); + MIB.addReg(SrcReg); + MIB.addImm(Lane); + + if (isKill) + MIB->addRegisterKilled(DstReg, TRI, true); + MIB->addRegisterDefined(DstReg, TRI); + + AddDefaultPred(MIB); + break; + } - // Add the extra source operand and new predicates. - // This will go before any implicit ops. - AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); } bool ARMBaseInstrInfo::hasNOP() const { diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 1a10a4a..92e5ee8 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -202,6 +202,13 @@ public: unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const; + virtual bool analyzeSelect(const MachineInstr *MI, + SmallVectorImpl<MachineOperand> &Cond, + unsigned &TrueOp, unsigned &FalseOp, + bool &Optimizable) const; + + virtual MachineInstr *optimizeSelect(MachineInstr *MI, bool) const; + /// FoldImmediate - 'Reg' is known to be defined by a move immediate /// instruction, try to fold the immediate into the use instruction. virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, @@ -352,6 +359,11 @@ ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg); int getMatchingCondBranchOpcode(int Opc); +/// Determine if MI can be folded into an ARM MOVCC instruction, and return the +/// opcode of the SSA instruction representing the conditional MI. +unsigned canFoldARMInstrIntoMOVCC(unsigned Reg, + MachineInstr *&MI, + const MachineRegisterInfo &MRI); /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether /// the instruction is encoded with an 'S' bit is determined by the optional diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 231bd26..9deb96e 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -62,8 +62,20 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, const uint16_t* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + bool ghcCall = false; + + if (MF) { + const Function *F = MF->getFunction(); + ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); + } + + if (ghcCall) { + return CSR_GHC_SaveList; + } + else { return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; + } } const uint32_t* diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index b9a2512..bda1517 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -79,6 +79,25 @@ def RetFastCC_ARM_APCS : CallingConv<[ CCDelegateTo<RetCC_ARM_APCS> ]>; +//===----------------------------------------------------------------------===// +// ARM APCS Calling Convention for GHC +//===----------------------------------------------------------------------===// + +def CC_ARM_APCS_GHC : CallingConv<[ + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, + + CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, + CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>, + CCIfType<[f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>, + + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType<i32>>, + + // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, SpLim + CCIfType<[i32], CCAssignToReg<[R4, R5, R6, R7, R8, R9, R10, R11]>> +]>; //===----------------------------------------------------------------------===// // ARM AAPCS (EABI) Calling Convention, common parts @@ -113,6 +132,9 @@ def RetCC_ARM_AAPCS_Common : CallingConv<[ //===----------------------------------------------------------------------===// def CC_ARM_AAPCS : CallingConv<[ + // Handles byval parameters. + CCIfByVal<CCPassByVal<4, 4>>, + // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, @@ -138,6 +160,9 @@ def RetCC_ARM_AAPCS : CallingConv<[ //===----------------------------------------------------------------------===// def CC_ARM_AAPCS_VFP : CallingConv<[ + // Handles byval parameters. + CCIfByVal<CCPassByVal<4, 4>>, + // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, @@ -171,3 +196,9 @@ def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, // iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register. // Also save R7-R4 first to match the stack frame fixed spill areas. def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; + +// GHC set of callee saved regs is empty as all those regs are +// used for passing STG regs around +// add is a workaround for not being able to compile empty list: +// def CSR_GHC : CalleeSavedRegs<()>; +def CSR_GHC : CalleeSavedRegs<(add)>; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index af260a5..132b81f 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -264,7 +264,7 @@ namespace { emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry); return 0; } - unsigned Reg = getARMRegisterNumbering(MO.getReg()); + unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg()); int32_t Imm12 = MO1.getImm(); uint32_t Binary; Binary = Imm12 & 0xfff; @@ -314,18 +314,24 @@ namespace { // {7-0} = imm8 uint32_t Binary = 0; const MachineOperand &MO = MI.getOperand(Op); - uint32_t Reg = getMachineOpValue(MI, MO); - Binary |= (Reg << 9); - - // If there is a non-zero immediate offset, encode it. - if (MO.isReg()) { - const MachineOperand &MO1 = MI.getOperand(Op + 1); - if (uint32_t ImmOffs = ARM_AM::getAM5Offset(MO1.getImm())) { - if (ARM_AM::getAM5Op(MO1.getImm()) == ARM_AM::add) - Binary |= 1 << 8; - Binary |= ImmOffs & 0xff; - return Binary; - } + const MachineOperand &MO1 = MI.getOperand(Op + 1); + if (!MO.isReg()) { + emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry); + return 0; + } + unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg()); + int32_t Imm12 = MO1.getImm(); + + // Special value for #-0 + if (Imm12 == INT32_MIN) + Imm12 = 0; + + // Immediate is always encoded as positive. The 'U' bit controls add vs + // sub. + bool isAdd = true; + if (Imm12 < 0) { + Imm12 = -Imm12; + isAdd = false; } // If immediate offset is omitted, default to +0. @@ -367,6 +373,12 @@ namespace { void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const; void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc, intptr_t JTBase = 0) const; + unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) const; }; } @@ -455,7 +467,7 @@ unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI, unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, const MachineOperand &MO) const { if (MO.isReg()) - return getARMRegisterNumbering(MO.getReg()); + return II->getRegisterInfo().getEncodingValue(MO.getReg()); else if (MO.isImm()) return static_cast<unsigned>(MO.getImm()); else if (MO.isFPImm()) @@ -816,7 +828,7 @@ void ARMCodeEmitter::emitLEApcrelInstruction(const MachineInstr &MI) { Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; // Encode Rn which is PC. - Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift; + Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift; // Encode the displacement which is a so_imm. // Set bit I(25) to identify this is the immediate form of <shifter_op> @@ -844,7 +856,7 @@ void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) { Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; // Encode Rn which is PC. - Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift; + Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift; // Encode the displacement. Binary |= 1 << ARMII::I_BitShift; @@ -1045,7 +1057,7 @@ unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI, if (Rs) { // Encode Rs bit[11:8]. assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); - return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift); + return Binary | (II->getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift); } // Encode shift_imm bit[11:7]. @@ -1101,7 +1113,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; else if (ImplicitRd) // Special handling for implicit use (e.g. PC). - Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift); + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift); if (MCID.Opcode == ARM::MOVi16) { // Get immediate from MI. @@ -1151,7 +1163,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, if (!isUnary) { if (ImplicitRn) // Special handling for implicit use (e.g. PC). - Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift); + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); else { Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift; ++OpIdx; @@ -1168,7 +1180,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, if (MO.isReg()) { // Encode register Rm. - emitWordLE(Binary | getARMRegisterNumbering(MO.getReg())); + emitWordLE(Binary | II->getRegisterInfo().getEncodingValue(MO.getReg())); return; } @@ -1217,14 +1229,14 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, // Set first operand if (ImplicitRd) // Special handling for implicit use (e.g. PC). - Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift); + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift); else Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; // Set second operand if (ImplicitRn) // Special handling for implicit use (e.g. PC). - Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift); + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); else Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; @@ -1251,7 +1263,7 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, Binary |= 1 << ARMII::I_BitShift; assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg())); // Set bit[3:0] to the corresponding Rm register - Binary |= getARMRegisterNumbering(MO2.getReg()); + Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg()); // If this instr is in scaled register offset/index instruction, set // shift_immed(bit[11:7]) and shift(bit[6:5]) fields. @@ -1295,7 +1307,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, // Set second operand if (ImplicitRn) // Special handling for implicit use (e.g. PC). - Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift); + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); else Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; @@ -1314,7 +1326,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, // If this instr is in register offset/index encoding, set bit[3:0] // to the corresponding Rm register. if (MO2.getReg()) { - Binary |= getARMRegisterNumbering(MO2.getReg()); + Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg()); emitWordLE(Binary); return; } @@ -1385,7 +1397,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || MO.isImplicit()) break; - unsigned RegNum = getARMRegisterNumbering(MO.getReg()); + unsigned RegNum = II->getRegisterInfo().getEncodingValue(MO.getReg()); assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && RegNum < 16); Binary |= 0x1 << RegNum; @@ -1632,7 +1644,7 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) { if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR) // The return register is LR. - Binary |= getARMRegisterNumbering(ARM::LR); + Binary |= II->getRegisterInfo().getEncodingValue(ARM::LR); else // otherwise, set the return register Binary |= getMachineOpValue(MI, 0); @@ -1640,11 +1652,12 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) { +unsigned ARMCodeEmitter::encodeVFPRd(const MachineInstr &MI, + unsigned OpIdx) const { unsigned RegD = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; bool isSPVFP = ARM::SPRRegClass.contains(RegD); - RegD = getARMRegisterNumbering(RegD); + RegD = II->getRegisterInfo().getEncodingValue(RegD); if (!isSPVFP) { Binary |= (RegD & 0x0F) << ARMII::RegRdShift; Binary |= ((RegD & 0x10) >> 4) << ARMII::D_BitShift; @@ -1655,11 +1668,12 @@ static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) { return Binary; } -static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) { +unsigned ARMCodeEmitter::encodeVFPRn(const MachineInstr &MI, + unsigned OpIdx) const { unsigned RegN = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; bool isSPVFP = ARM::SPRRegClass.contains(RegN); - RegN = getARMRegisterNumbering(RegN); + RegN = II->getRegisterInfo().getEncodingValue(RegN); if (!isSPVFP) { Binary |= (RegN & 0x0F) << ARMII::RegRnShift; Binary |= ((RegN & 0x10) >> 4) << ARMII::N_BitShift; @@ -1670,11 +1684,12 @@ static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) { return Binary; } -static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) { +unsigned ARMCodeEmitter::encodeVFPRm(const MachineInstr &MI, + unsigned OpIdx) const { unsigned RegM = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; bool isSPVFP = ARM::SPRRegClass.contains(RegM); - RegM = getARMRegisterNumbering(RegM); + RegM = II->getRegisterInfo().getEncodingValue(RegM); if (!isSPVFP) { Binary |= (RegM & 0x0F); Binary |= ((RegM & 0x10) >> 4) << ARMII::M_BitShift; @@ -1885,28 +1900,31 @@ void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) { +unsigned ARMCodeEmitter::encodeNEONRd(const MachineInstr &MI, + unsigned OpIdx) const { unsigned RegD = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; - RegD = getARMRegisterNumbering(RegD); + RegD = II->getRegisterInfo().getEncodingValue(RegD); Binary |= (RegD & 0xf) << ARMII::RegRdShift; Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift; return Binary; } -static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) { +unsigned ARMCodeEmitter::encodeNEONRn(const MachineInstr &MI, + unsigned OpIdx) const { unsigned RegN = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; - RegN = getARMRegisterNumbering(RegN); + RegN = II->getRegisterInfo().getEncodingValue(RegN); Binary |= (RegN & 0xf) << ARMII::RegRnShift; Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift; return Binary; } -static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) { +unsigned ARMCodeEmitter::encodeNEONRm(const MachineInstr &MI, + unsigned OpIdx) const { unsigned RegM = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; - RegM = getARMRegisterNumbering(RegM); + RegM = II->getRegisterInfo().getEncodingValue(RegM); Binary |= (RegM & 0xf); Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift; return Binary; @@ -1940,7 +1958,7 @@ void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) { Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; unsigned RegT = MI.getOperand(RegTOpIdx).getReg(); - RegT = getARMRegisterNumbering(RegT); + RegT = II->getRegisterInfo().getEncodingValue(RegT); Binary |= (RegT << ARMII::RegRdShift); Binary |= encodeNEONRn(MI, RegNOpIdx); @@ -1969,7 +1987,7 @@ void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) { Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; unsigned RegT = MI.getOperand(1).getReg(); - RegT = getARMRegisterNumbering(RegT); + RegT = II->getRegisterInfo().getEncodingValue(RegT); Binary |= (RegT << ARMII::RegRdShift); Binary |= encodeNEONRn(MI, 0); emitWordLE(Binary); diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index a242b13..15bb32e 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1009,7 +1009,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); unsigned OpIdx = 0; unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned Lane = getARMRegisterNumbering(SrcReg) & 1; + unsigned Lane = TRI->getEncodingValue(SrcReg) & 1; unsigned DReg = TRI->getMatchingSuperReg(SrcReg, Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass); diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index b96395f..5a5ca1b 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -87,8 +87,9 @@ class ARMFastISel : public FastISel { LLVMContext *Context; public: - explicit ARMFastISel(FunctionLoweringInfo &funcInfo) - : FastISel(funcInfo), + explicit ARMFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) + : FastISel(funcInfo, libInfo), TM(funcInfo.MF->getTarget()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()) { @@ -99,51 +100,53 @@ class ARMFastISel : public FastISel { } // Code from FastISel.cpp. - virtual unsigned FastEmitInst_(unsigned MachineInstOpcode, - const TargetRegisterClass *RC); - virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill); - virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill); - virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill, - unsigned Op2, bool Op2IsKill); - virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - uint64_t Imm); - virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - const ConstantFP *FPImm); - virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill, - uint64_t Imm); - virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - uint64_t Imm); - virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - uint64_t Imm1, uint64_t Imm2); - - virtual unsigned FastEmitInst_extractsubreg(MVT RetVT, - unsigned Op0, bool Op0IsKill, - uint32_t Idx); + private: + unsigned FastEmitInst_(unsigned MachineInstOpcode, + const TargetRegisterClass *RC); + unsigned FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill); + unsigned FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill); + unsigned FastEmitInst_rrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + unsigned Op2, bool Op2IsKill); + unsigned FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm); + unsigned FastEmitInst_rf(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm); + unsigned FastEmitInst_rri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm); + unsigned FastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm); + unsigned FastEmitInst_ii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm1, uint64_t Imm2); + + unsigned FastEmitInst_extractsubreg(MVT RetVT, + unsigned Op0, bool Op0IsKill, + uint32_t Idx); // Backend specific FastISel code. + private: virtual bool TargetSelectInstruction(const Instruction *I); virtual unsigned TargetMaterializeConstant(const Constant *C); virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, const LoadInst *LI); - + private: #include "ARMGenFastISel.inc" // Instruction selection routines. @@ -167,6 +170,7 @@ class ARMFastISel : public FastISel { bool SelectRet(const Instruction *I); bool SelectTrunc(const Instruction *I); bool SelectIntExt(const Instruction *I); + bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy); // Utility routines. private: @@ -1819,9 +1823,12 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, default: llvm_unreachable("Unsupported calling convention"); case CallingConv::Fast: - // Ignore fastcc. Silence compiler warnings. - (void)RetFastCC_ARM_APCS; - (void)FastCC_ARM_APCS; + if (Subtarget->hasVFP2() && !isVarArg) { + if (!Subtarget->isAAPCS_ABI()) + return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); + // For AAPCS ABI targets, just use VFP variant of the calling convention. + return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + } // Fallthrough case CallingConv::C: // Use target triple & subtarget features to do actual dispatch. @@ -1842,6 +1849,11 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + case CallingConv::GHC: + if (Return) + llvm_unreachable("Can't return in GHC call convention"); + else + return CC_ARM_APCS_GHC; } } @@ -2608,6 +2620,61 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) { return true; } +bool ARMFastISel::SelectShift(const Instruction *I, + ARM_AM::ShiftOpc ShiftTy) { + // We handle thumb2 mode by target independent selector + // or SelectionDAG ISel. + if (isThumb2) + return false; + + // Only handle i32 now. + EVT DestVT = TLI.getValueType(I->getType(), true); + if (DestVT != MVT::i32) + return false; + + unsigned Opc = ARM::MOVsr; + unsigned ShiftImm; + Value *Src2Value = I->getOperand(1); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) { + ShiftImm = CI->getZExtValue(); + + // Fall back to selection DAG isel if the shift amount + // is zero or greater than the width of the value type. + if (ShiftImm == 0 || ShiftImm >=32) + return false; + + Opc = ARM::MOVsi; + } + + Value *Src1Value = I->getOperand(0); + unsigned Reg1 = getRegForValue(Src1Value); + if (Reg1 == 0) return false; + + unsigned Reg2; + if (Opc == ARM::MOVsr) { + Reg2 = getRegForValue(Src2Value); + if (Reg2 == 0) return false; + } + + unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + if(ResultReg == 0) return false; + + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg) + .addReg(Reg1); + + if (Opc == ARM::MOVsi) + MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm)); + else if (Opc == ARM::MOVsr) { + MIB.addReg(Reg2); + MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0)); + } + + AddOptionalDefs(MIB); + UpdateValueMap(I, ResultReg); + return true; +} + // TODO: SoftFP support. bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { @@ -2668,6 +2735,12 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { case Instruction::ZExt: case Instruction::SExt: return SelectIntExt(I); + case Instruction::Shl: + return SelectShift(I, ARM_AM::lsl); + case Instruction::LShr: + return SelectShift(I, ARM_AM::lsr); + case Instruction::AShr: + return SelectShift(I, ARM_AM::asr); default: break; } return false; @@ -2720,14 +2793,15 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, } namespace llvm { - FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { + FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) { // Completely untested on non-iOS. const TargetMachine &TM = funcInfo.MF->getTarget(); // Darwin and thumb1 only for now. const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>(); if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only()) - return new ARMFastISel(funcInfo); + return new ARMFastISel(funcInfo, libInfo); return 0; } } diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 2629496..aee72d2 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -15,6 +15,8 @@ #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" +#include "llvm/CallingConv.h" +#include "llvm/Function.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -151,6 +153,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { int FramePtrSpillFI = 0; int D8SpillFI = 0; + // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue. + if (MF.getFunction()->getCallingConv() == CallingConv::GHC) + return; + // Allocate the vararg register save area. This is not counted in NumBytes. if (VARegSaveSize) emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize, @@ -354,6 +360,10 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); + // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue. + if (MF.getFunction()->getCallingConv() == CallingConv::GHC) + return; + if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 1953192..c6f9d15 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -47,11 +47,6 @@ CheckVMLxHazard("check-vmlx-hazard", cl::Hidden, cl::desc("Check fp vmla / vmls hazard at isel time"), cl::init(true)); -static cl::opt<bool> -DisableARMIntABS("disable-arm-int-abs", cl::Hidden, - cl::desc("Enable / disable ARM integer abs transform"), - cl::init(false)); - //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine /// instructions for SelectionDAG operations. @@ -244,7 +239,6 @@ private: /// SelectCMOVOp - Select CMOV instructions for ARM. SDNode *SelectCMOVOp(SDNode *N); - SDNode *SelectConditionalOp(SDNode *N); SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); @@ -2368,115 +2362,6 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); } -SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) { - SDValue FalseVal = N->getOperand(0); - SDValue TrueVal = N->getOperand(1); - ARMCC::CondCodes CCVal = - (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); - SDValue CCR = N->getOperand(3); - assert(CCR.getOpcode() == ISD::Register); - SDValue InFlag = N->getOperand(4); - SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - - if (Subtarget->isThumb()) { - SDValue CPTmp0; - SDValue CPTmp1; - if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) { - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::t2ANDCCrs; break; - case ARMISD::COR: Opc = ARM::t2ORRCCrs; break; - case ARMISD::CXOR: Opc = ARM::t2EORCCrs; break; - } - SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CC, CCR, Reg0, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7); - } - - ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); - if (T) { - unsigned TrueImm = T->getZExtValue(); - if (is_t2_so_imm(TrueImm)) { - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::t2ANDCCri; break; - case ARMISD::COR: Opc = ARM::t2ORRCCri; break; - case ARMISD::CXOR: Opc = ARM::t2EORCCri; break; - } - SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); - SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); - } - } - - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::t2ANDCCrr; break; - case ARMISD::COR: Opc = ARM::t2ORRCCrr; break; - case ARMISD::CXOR: Opc = ARM::t2EORCCrr; break; - } - SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); - } - - SDValue CPTmp0; - SDValue CPTmp1; - SDValue CPTmp2; - if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) { - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::ANDCCrsi; break; - case ARMISD::COR: Opc = ARM::ORRCCrsi; break; - case ARMISD::CXOR: Opc = ARM::EORCCrsi; break; - } - SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, Reg0, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7); - } - - if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) { - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::ANDCCrsr; break; - case ARMISD::COR: Opc = ARM::ORRCCrsr; break; - case ARMISD::CXOR: Opc = ARM::EORCCrsr; break; - } - SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, Reg0, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8); - } - - ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); - if (T) { - unsigned TrueImm = T->getZExtValue(); - if (is_so_imm(TrueImm)) { - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::ANDCCri; break; - case ARMISD::COR: Opc = ARM::ORRCCri; break; - case ARMISD::CXOR: Opc = ARM::EORCCri; break; - } - SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); - SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); - } - } - - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::ANDCCrr; break; - case ARMISD::COR: Opc = ARM::ORRCCrr; break; - case ARMISD::CXOR: Opc = ARM::EORCCrr; break; - } - SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); -} - /// Target-specific DAG combining for ISD::XOR. /// Target-independent combining lowers SELECT_CC nodes of the form /// select_cc setg[ge] X, 0, X, -X @@ -2492,14 +2377,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ SDValue XORSrc1 = N->getOperand(1); EVT VT = N->getValueType(0); - if (DisableARMIntABS) - return NULL; - if (Subtarget->isThumb1Only()) return NULL; - if (XORSrc0.getOpcode() != ISD::ADD || - XORSrc1.getOpcode() != ISD::SRA) + if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) return NULL; SDValue ADDSrc0 = XORSrc0.getOperand(0); @@ -2510,16 +2391,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ EVT XType = SRASrc0.getValueType(); unsigned Size = XType.getSizeInBits() - 1; - if (ADDSrc1 == XORSrc1 && - ADDSrc0 == SRASrc0 && - XType.isInteger() && - SRAConstant != NULL && + if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && + XType.isInteger() && SRAConstant != NULL && Size == SRAConstant->getZExtValue()) { - - unsigned Opcode = ARM::ABS; - if (Subtarget->isThumb2()) - Opcode = ARM::t2ABS; - + unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); } @@ -2814,10 +2689,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::CMOV: return SelectCMOVOp(N); - case ARMISD::CAND: - case ARMISD::COR: - case ARMISD::CXOR: - return SelectConditionalOp(N); case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 04370c0..df4039b 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -90,75 +90,70 @@ static const uint16_t GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; -void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, - EVT PromotedBitwiseVT) { +void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, + MVT PromotedBitwiseVT) { if (VT != PromotedLdStVT) { - setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); - AddPromotedToType (ISD::LOAD, VT.getSimpleVT(), - PromotedLdStVT.getSimpleVT()); + setOperationAction(ISD::LOAD, VT, Promote); + AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); - setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); - AddPromotedToType (ISD::STORE, VT.getSimpleVT(), - PromotedLdStVT.getSimpleVT()); + setOperationAction(ISD::STORE, VT, Promote); + AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); } - EVT ElemTy = VT.getVectorElementType(); + MVT ElemTy = VT.getVectorElementType(); if (ElemTy != MVT::i64 && ElemTy != MVT::f64) - setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); if (ElemTy == MVT::i32) { - setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Custom); - setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Custom); - setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom); - setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom); + setOperationAction(ISD::SINT_TO_FP, VT, Custom); + setOperationAction(ISD::UINT_TO_FP, VT, Custom); + setOperationAction(ISD::FP_TO_SINT, VT, Custom); + setOperationAction(ISD::FP_TO_UINT, VT, Custom); } else { - setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); - setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand); - } - setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); - setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal); - setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); - setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SINT_TO_FP, VT, Expand); + setOperationAction(ISD::UINT_TO_FP, VT, Expand); + setOperationAction(ISD::FP_TO_SINT, VT, Expand); + setOperationAction(ISD::FP_TO_UINT, VT, Expand); + } + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); + setOperationAction(ISD::SELECT, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); if (VT.isInteger()) { - setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); - setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); - setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::SRL, VT, Custom); } // Promote all bit-wise operations. if (VT.isInteger() && VT != PromotedBitwiseVT) { - setOperationAction(ISD::AND, VT.getSimpleVT(), Promote); - AddPromotedToType (ISD::AND, VT.getSimpleVT(), - PromotedBitwiseVT.getSimpleVT()); - setOperationAction(ISD::OR, VT.getSimpleVT(), Promote); - AddPromotedToType (ISD::OR, VT.getSimpleVT(), - PromotedBitwiseVT.getSimpleVT()); - setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote); - AddPromotedToType (ISD::XOR, VT.getSimpleVT(), - PromotedBitwiseVT.getSimpleVT()); + setOperationAction(ISD::AND, VT, Promote); + AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); + setOperationAction(ISD::OR, VT, Promote); + AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); + setOperationAction(ISD::XOR, VT, Promote); + AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); } // Neon does not support vector divide/remainder operations. - setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); - setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand); - setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); - setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::FDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::FREM, VT, Expand); } -void ARMTargetLowering::addDRTypeForNEON(EVT VT) { +void ARMTargetLowering::addDRTypeForNEON(MVT VT) { addRegisterClass(VT, &ARM::DPRRegClass); addTypeForNEON(VT, MVT::f64, MVT::v2i32); } -void ARMTargetLowering::addQRTypeForNEON(EVT VT) { +void ARMTargetLowering::addQRTypeForNEON(MVT VT) { addRegisterClass(VT, &ARM::QPRRegClass); addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); } @@ -903,9 +898,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; case ARMISD::CMOV: return "ARMISD::CMOV"; - case ARMISD::CAND: return "ARMISD::CAND"; - case ARMISD::COR: return "ARMISD::COR"; - case ARMISD::CXOR: return "ARMISD::CXOR"; case ARMISD::RBIT: return "ARMISD::RBIT"; @@ -1041,8 +1033,9 @@ const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { // Create a fast isel object. FastISel * -ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { - return ARM::createFastISel(funcInfo); +ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) const { + return ARM::createFastISel(funcInfo, libInfo); } /// getMaximalGlobalOffset - Returns the maximal possible offset which can @@ -1171,6 +1164,8 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); + case CallingConv::GHC: + return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); } } @@ -4271,6 +4266,10 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, // Record this extraction against the appropriate vector if possible... SDValue SourceVec = V.getOperand(0); + // If the element number isn't a constant, we can't effectively + // analyze what's going on. + if (!isa<ConstantSDNode>(V.getOperand(1))) + return SDValue(); unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); bool FoundSource = false; for (unsigned j = 0; j < SourceVecs.size(); ++j) { @@ -6152,13 +6151,12 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { } // Add the jump table entries as successors to the MBB. - MachineBasicBlock *PrevMBB = 0; + SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs; for (std::vector<MachineBasicBlock*>::iterator I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { MachineBasicBlock *CurMBB = *I; - if (PrevMBB != CurMBB) + if (SeenMBBs.insert(CurMBB)) DispContBB->addSuccessor(CurMBB); - PrevMBB = CurMBB; } // N.B. the order the invoke BBs are processed in doesn't matter here. @@ -6971,62 +6969,137 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // ARM Optimization Hooks //===----------------------------------------------------------------------===// +// Helper function that checks if N is a null or all ones constant. +static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); + if (!C) + return false; + return AllOnes ? C->isAllOnesValue() : C->isNullValue(); +} + +// Return true if N is conditionally 0 or all ones. +// Detects these expressions where cc is an i1 value: +// +// (select cc 0, y) [AllOnes=0] +// (select cc y, 0) [AllOnes=0] +// (zext cc) [AllOnes=0] +// (sext cc) [AllOnes=0/1] +// (select cc -1, y) [AllOnes=1] +// (select cc y, -1) [AllOnes=1] +// +// Invert is set when N is the null/all ones constant when CC is false. +// OtherOp is set to the alternative value of N. +static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, + SDValue &CC, bool &Invert, + SDValue &OtherOp, + SelectionDAG &DAG) { + switch (N->getOpcode()) { + default: return false; + case ISD::SELECT: { + CC = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + if (isZeroOrAllOnes(N1, AllOnes)) { + Invert = false; + OtherOp = N2; + return true; + } + if (isZeroOrAllOnes(N2, AllOnes)) { + Invert = true; + OtherOp = N1; + return true; + } + return false; + } + case ISD::ZERO_EXTEND: + // (zext cc) can never be the all ones value. + if (AllOnes) + return false; + // Fall through. + case ISD::SIGN_EXTEND: { + EVT VT = N->getValueType(0); + CC = N->getOperand(0); + if (CC.getValueType() != MVT::i1) + return false; + Invert = !AllOnes; + if (AllOnes) + // When looking for an AllOnes constant, N is an sext, and the 'other' + // value is 0. + OtherOp = DAG.getConstant(0, VT); + else if (N->getOpcode() == ISD::ZERO_EXTEND) + // When looking for a 0 constant, N can be zext or sext. + OtherOp = DAG.getConstant(1, VT); + else + OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); + return true; + } + } +} + +// Combine a constant select operand into its use: +// +// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) +// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) +// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1] +// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) +// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) +// +// The transform is rejected if the select doesn't have a constant operand that +// is null, or all ones when AllOnes is set. +// +// Also recognize sext/zext from i1: +// +// (add (zext cc), x) -> (select cc (add x, 1), x) +// (add (sext cc), x) -> (select cc (add x, -1), x) +// +// These transformations eventually create predicated instructions. +// +// @param N The node to transform. +// @param Slct The N operand that is a select. +// @param OtherOp The other N operand (x above). +// @param DCI Context. +// @param AllOnes Require the select constant to be all ones instead of null. +// @returns The new node, or SDValue() on failure. static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + bool AllOnes = false) { SelectionDAG &DAG = DCI.DAG; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = N->getValueType(0); - unsigned Opc = N->getOpcode(); - bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; - SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); - SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); - ISD::CondCode CC = ISD::SETCC_INVALID; - - if (isSlctCC) { - CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get(); - } else { - SDValue CCOp = Slct.getOperand(0); - if (CCOp.getOpcode() == ISD::SETCC) - CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get(); - } - - bool DoXform = false; - bool InvCC = false; - assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && - "Bad input!"); - - if (LHS.getOpcode() == ISD::Constant && - cast<ConstantSDNode>(LHS)->isNullValue()) { - DoXform = true; - } else if (CC != ISD::SETCC_INVALID && - RHS.getOpcode() == ISD::Constant && - cast<ConstantSDNode>(RHS)->isNullValue()) { - std::swap(LHS, RHS); - SDValue Op0 = Slct.getOperand(0); - EVT OpVT = isSlctCC ? Op0.getValueType() : - Op0.getOperand(0).getValueType(); - bool isInt = OpVT.isInteger(); - CC = ISD::getSetCCInverse(CC, isInt); - - if (!TLI.isCondCodeLegal(CC, OpVT)) - return SDValue(); // Inverse operator isn't legal. - - DoXform = true; - InvCC = true; - } - - if (DoXform) { - SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS); - if (isSlctCC) - return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result, - Slct.getOperand(0), Slct.getOperand(1), CC); - SDValue CCOp = Slct.getOperand(0); - if (InvCC) - CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(), - CCOp.getOperand(0), CCOp.getOperand(1), CC); - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, - CCOp, OtherOp, Result); + SDValue NonConstantVal; + SDValue CCOp; + bool SwapSelectOps; + if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps, + NonConstantVal, DAG)) + return SDValue(); + + // Slct is now know to be the desired identity constant when CC is true. + SDValue TrueVal = OtherOp; + SDValue FalseVal = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, + OtherOp, NonConstantVal); + // Unless SwapSelectOps says CC should be false. + if (SwapSelectOps) + std::swap(TrueVal, FalseVal); + + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, + CCOp, TrueVal, FalseVal); +} + +// Attempt combineSelectAndUse on each operand of a commutative operator N. +static +SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, + TargetLowering::DAGCombinerInfo &DCI) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (N0.getNode()->hasOneUse()) { + SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes); + if (Result.getNode()) + return Result; + } + if (N1.getNode()->hasOneUse()) { + SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes); + if (Result.getNode()) + return Result; } return SDValue(); } @@ -7134,7 +7207,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, return Result; // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) - if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { + if (N0.getNode()->hasOneUse()) { SDValue Result = combineSelectAndUse(N, N0, N1, DCI); if (Result.getNode()) return Result; } @@ -7166,7 +7239,7 @@ static SDValue PerformSUBCombine(SDNode *N, SDValue N1 = N->getOperand(1); // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) - if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { + if (N1.getNode()->hasOneUse()) { SDValue Result = combineSelectAndUse(N, N1, N0, DCI); if (Result.getNode()) return Result; } @@ -7294,49 +7367,6 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); } -static bool isCMOVWithZeroOrAllOnesLHS(SDValue N, bool AllOnes) { - if (N.getOpcode() != ARMISD::CMOV || !N.getNode()->hasOneUse()) - return false; - - SDValue FalseVal = N.getOperand(0); - ConstantSDNode *C = dyn_cast<ConstantSDNode>(FalseVal); - if (!C) - return false; - if (AllOnes) - return C->isAllOnesValue(); - return C->isNullValue(); -} - -/// formConditionalOp - Combine an operation with a conditional move operand -/// to form a conditional op. e.g. (or x, (cmov 0, y, cond)) => (or.cond x, y) -/// (and x, (cmov -1, y, cond)) => (and.cond, x, y) -static SDValue formConditionalOp(SDNode *N, SelectionDAG &DAG, - bool Commutable) { - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - - bool isAND = N->getOpcode() == ISD::AND; - bool isCand = isCMOVWithZeroOrAllOnesLHS(N1, isAND); - if (!isCand && Commutable) { - isCand = isCMOVWithZeroOrAllOnesLHS(N0, isAND); - if (isCand) - std::swap(N0, N1); - } - if (!isCand) - return SDValue(); - - unsigned Opc = 0; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ISD::AND: Opc = ARMISD::CAND; break; - case ISD::OR: Opc = ARMISD::COR; break; - case ISD::XOR: Opc = ARMISD::CXOR; break; - } - return DAG.getNode(Opc, N->getDebugLoc(), N->getValueType(0), N0, - N1.getOperand(1), N1.getOperand(2), N1.getOperand(3), - N1.getOperand(4)); -} - static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { @@ -7371,10 +7401,10 @@ static SDValue PerformANDCombine(SDNode *N, } if (!Subtarget->isThumb1Only()) { - // (and x, (cmov -1, y, cond)) => (and.cond x, y) - SDValue CAND = formConditionalOp(N, DAG, true); - if (CAND.getNode()) - return CAND; + // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) + SDValue Result = combineSelectAndUseCommutative(N, true, DCI); + if (Result.getNode()) + return Result; } return SDValue(); @@ -7414,14 +7444,17 @@ static SDValue PerformORCombine(SDNode *N, } if (!Subtarget->isThumb1Only()) { - // (or x, (cmov 0, y, cond)) => (or.cond x, y) - SDValue COR = formConditionalOp(N, DAG, true); - if (COR.getNode()) - return COR; + // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) + SDValue Result = combineSelectAndUseCommutative(N, false, DCI); + if (Result.getNode()) + return Result; } + // The code below optimizes (or (and X, Y), Z). + // The AND operand needs to have a single user to make these optimizations + // profitable. SDValue N0 = N->getOperand(0); - if (N0.getOpcode() != ISD::AND) + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) return SDValue(); SDValue N1 = N->getOperand(1); @@ -7578,10 +7611,10 @@ static SDValue PerformXORCombine(SDNode *N, return SDValue(); if (!Subtarget->isThumb1Only()) { - // (xor x, (cmov 0, y, cond)) => (xor.cond x, y) - SDValue CXOR = formConditionalOp(N, DAG, true); - if (CXOR.getNode()) - return CXOR; + // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) + SDValue Result = combineSelectAndUseCommutative(N, false, DCI); + if (Result.getNode()) + return Result; } return SDValue(); @@ -8802,6 +8835,8 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { case MVT::i16: case MVT::i32: return true; + case MVT::f64: + return Subtarget->hasNEON(); // FIXME: VLD1 etc with standard alignment is legal. } } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 7ad48b9..13b83de 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -63,9 +63,6 @@ namespace llvm { FMSTAT, // ARM fmstat instruction. CMOV, // ARM conditional move instructions. - CAND, // ARM conditional and instructions. - COR, // ARM conditional or instructions. - CXOR, // ARM conditional xor instructions. BCC_i64, @@ -361,7 +358,8 @@ namespace llvm { /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. - virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const; + virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) const; Sched::Preference getSchedulingPreference(SDNode *N) const; @@ -393,9 +391,9 @@ namespace llvm { /// unsigned ARMPCLabelIndex; - void addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT PromotedBitwiseVT); - void addDRTypeForNEON(EVT VT); - void addQRTypeForNEON(EVT VT); + void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT); + void addDRTypeForNEON(MVT VT); + void addQRTypeForNEON(MVT VT); typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector; void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, @@ -544,7 +542,8 @@ namespace llvm { namespace ARM { - FastISel *createFastISel(FunctionLoweringInfo &funcInfo); + FastISel *createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo); } } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 1b8fc3f..992aba5 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -242,6 +242,9 @@ def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || " "Subtarget->isTargetDarwin()">; +def IsLE : Predicate<"TLI.isLittleEndian()">; +def IsBE : Predicate<"TLI.isBigEndian()">; + //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -416,8 +419,11 @@ def pclabel : Operand<i32> { } // ADR instruction labels. +def AdrLabelAsmOperand : AsmOperandClass { let Name = "AdrLabel"; } def adrlabel : Operand<i32> { let EncoderMethod = "getAdrLabelOpValue"; + let ParserMatchClass = AdrLabelAsmOperand; + let PrintMethod = "printAdrLabelOperand"; } def neon_vcvt_imm32 : Operand<i32> { @@ -968,7 +974,7 @@ include "ARMInstrFormats.td" let TwoOperandAliasConstraint = "$Rn = $Rd" in multiclass AsI1_bin_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, string baseOpc, bit Commutable = 0> { + PatFrag opnode, bit Commutable = 0> { // The register-immediate version is re-materializable. This is useful // in particular for taking the address of a local. let isReMaterializable = 1 in { @@ -1037,7 +1043,7 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, let TwoOperandAliasConstraint = "$Rn = $Rd" in multiclass AsI1_rbin_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, string baseOpc, bit Commutable = 0> { + PatFrag opnode, bit Commutable = 0> { // The register-immediate version is re-materializable. This is useful // in particular for taking the address of a local. let isReMaterializable = 1 in { @@ -1285,7 +1291,7 @@ class AI_exta_rrot_np<bits<8> opcod, string opc> /// AI1_adde_sube_irs - Define instructions and patterns for adde and sube. let TwoOperandAliasConstraint = "$Rn = $Rd" in multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, - string baseOpc, bit Commutable = 0> { + bit Commutable = 0> { let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in { def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm", @@ -1351,8 +1357,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, /// AI1_rsc_irs - Define instructions and patterns for rsc let TwoOperandAliasConstraint = "$Rn = $Rd" in -multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode, - string baseOpc> { +multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> { let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in { def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm", @@ -2816,9 +2821,6 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, let Inst{15-12} = Rd; } -def : ARMInstAlias<"movs${p} $Rd, $Rm", - (MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>; - // A version for the smaller set of tail call registers. let neverHasSideEffects = 1 in def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm, @@ -3029,10 +3031,10 @@ def UBFX : I<(outs GPR:$Rd), defm ADD : AsI1_bin_irs<0b0100, "add", IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(add node:$LHS, node:$RHS)>, "ADD", 1>; + BinOpFrag<(add node:$LHS, node:$RHS)>, 1>; defm SUB : AsI1_bin_irs<0b0010, "sub", IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(sub node:$LHS, node:$RHS)>, "SUB">; + BinOpFrag<(sub node:$LHS, node:$RHS)>>; // ADD and SUB with 's' bit set. // @@ -3050,15 +3052,13 @@ defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; defm ADC : AI1_adde_sube_irs<0b0101, "adc", - BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, - "ADC", 1>; + BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>; defm SBC : AI1_adde_sube_irs<0b0110, "sbc", - BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>, - "SBC">; + BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>; -defm RSB : AsI1_rbin_irs <0b0011, "rsb", - IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(sub node:$LHS, node:$RHS)>, "RSB">; +defm RSB : AsI1_rbin_irs<0b0011, "rsb", + IIC_iALUi, IIC_iALUr, IIC_iALUsr, + BinOpFrag<(sub node:$LHS, node:$RHS)>>; // FIXME: Eliminate them if we can write def : Pat patterns which defines // CPSR and the implicit def of CPSR is not needed. @@ -3066,8 +3066,7 @@ defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUr, IIC_iALUsr, BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; defm RSC : AI1_rsc_irs<0b0111, "rsc", - BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>, - "RSC">; + BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>; // (sub X, imm) gets canonicalized to (add X, -imm). Match this form. // The assume-no-carry-in form uses the negation of the input since add/sub @@ -3276,16 +3275,16 @@ def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm:$pos), defm AND : AsI1_bin_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(and node:$LHS, node:$RHS)>, "AND", 1>; + BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; defm ORR : AsI1_bin_irs<0b1100, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(or node:$LHS, node:$RHS)>, "ORR", 1>; + BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; defm EOR : AsI1_bin_irs<0b0001, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(xor node:$LHS, node:$RHS)>, "EOR", 1>; + BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>; defm BIC : AsI1_bin_irs<0b1110, "bic", IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(and node:$LHS, (not node:$RHS))>, "BIC">; + BinOpFrag<(and node:$LHS, (not node:$RHS))>>; // FIXME: bf_inv_mask_imm should be two operands, the lsb and the msb, just // like in the actual instruction encoding. The complexity of mapping the mask @@ -3940,7 +3939,7 @@ def BCCZi64 : PseudoInst<(outs), // a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { -let isCommutable = 1 in +let isCommutable = 1, isSelect = 1 in def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p), 4, IIC_iCMOVr, [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, @@ -3993,25 +3992,29 @@ multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> { def ri : ARMPseudoExpand<(outs GPR:$Rd), - (ins GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s), + (ins GPR:$Rfalse, GPR:$Rn, so_imm:$imm, + pred:$p, cc_out:$s), 4, iii, [], (iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rn = $Rd">; + RegConstraint<"$Rfalse = $Rd">; def rr : ARMPseudoExpand<(outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rfalse, GPR:$Rn, GPR:$Rm, + pred:$p, cc_out:$s), 4, iir, [], (irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rn = $Rd">; + RegConstraint<"$Rfalse = $Rd">; def rsi : ARMPseudoExpand<(outs GPR:$Rd), - (ins GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s), + (ins GPR:$Rfalse, GPR:$Rn, so_reg_imm:$shift, + pred:$p, cc_out:$s), 4, iis, [], (irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>, - RegConstraint<"$Rn = $Rd">; + RegConstraint<"$Rfalse = $Rd">; def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s), + (ins GPRnopc:$Rfalse, GPRnopc:$Rn, so_reg_reg:$shift, + pred:$p, cc_out:$s), 4, iis, [], (irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>, - RegConstraint<"$Rn = $Rd">; + RegConstraint<"$Rfalse = $Rd">; } defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr, @@ -4020,6 +4023,10 @@ defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr, IIC_iBITi, IIC_iBITr, IIC_iBITsr>; defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr, IIC_iBITi, IIC_iBITr, IIC_iBITsr>; +defm ADDCC : AsI1_bincc_irs<ADDri, ADDrr, ADDrsi, ADDrsr, + IIC_iBITi, IIC_iBITr, IIC_iBITsr>; +defm SUBCC : AsI1_bincc_irs<SUBri, SUBrr, SUBrsi, SUBrsr, + IIC_iBITi, IIC_iBITr, IIC_iBITsr>; } // neverHasSideEffects @@ -4068,11 +4075,8 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, // Pseudo instruction that combines movs + predicated rsbmi // to implement integer ABS -let usesCustomInserter = 1, Defs = [CPSR] in { -def ABS : ARMPseudoInst< - (outs GPR:$dst), (ins GPR:$src), - 8, NoItinerary, []>; -} +let usesCustomInserter = 1, Defs = [CPSR] in +def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>; let usesCustomInserter = 1 in { let Defs = [CPSR] in { diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 3134088..048d340 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -398,6 +398,27 @@ def VecListFourQWordIndexed : Operand<i32> { let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } +def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() == 2; +}]>; +def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() == 2; +}]>; +def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() == 1; +}]>; +def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() == 1; +}]>; +def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4; +}]>; +def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() < 4; +}]>; //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. @@ -2238,6 +2259,19 @@ def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 +// Use vld1/vst1 for unaligned f64 load / store +def : Pat<(f64 (hword_alignedload addrmode6:$addr)), + (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; +def : Pat<(f64 (byte_alignedload addrmode6:$addr)), + (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; +def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), + (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>; +def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>; //===----------------------------------------------------------------------===// // NEON pattern fragments diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index d83530a..8ecf009 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -172,6 +172,7 @@ def t2ldr_pcrel_imm12 : Operand<i32> { // ADR instruction labels. def t2adrlabel : Operand<i32> { let EncoderMethod = "getT2AdrLabelOpValue"; + let PrintMethod = "printAdrLabelOperand"; } @@ -529,7 +530,7 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4, /// changed to modify CPSR. multiclass T2I_bin_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, string baseOpc, bit Commutable = 0, + PatFrag opnode, bit Commutable = 0, string wide = ""> { // shifted imm def ri : T2sTwoRegImm< @@ -565,15 +566,15 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, // Assembly aliases for optional destination operand when it's the same // as the source operand. def : t2InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"), - (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, + (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"), - (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, + (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"), - (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn, + (!cast<Instruction>(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$shift, pred:$p, cc_out:$s)>; } @@ -582,36 +583,30 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, // the ".w" suffix to indicate that they are wide. multiclass T2I_bin_w_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, string baseOpc, bit Commutable = 0> : - T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w"> { + PatFrag opnode, bit Commutable = 0> : + T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w"> { // Assembler aliases w/ the ".w" suffix. def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rd, $Rn, $imm"), - (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn, - t2_so_imm:$imm, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"ri") rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, + cc_out:$s)>; // Assembler aliases w/o the ".w" suffix. def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"), - (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn, - rGPR:$Rm, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"rr") rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, + cc_out:$s)>; def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $shift"), - (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rd, rGPR:$Rn, - t2_so_reg:$shift, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"rs") rGPR:$Rd, rGPR:$Rn, t2_so_reg:$shift, + pred:$p, cc_out:$s)>; // and with the optional destination operand, too. def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rdn, $imm"), - (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, - t2_so_imm:$imm, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, + pred:$p, cc_out:$s)>; def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"), - (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, - rGPR:$Rm, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, + cc_out:$s)>; def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $shift"), - (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn, - t2_so_reg:$shift, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$shift, + pred:$p, cc_out:$s)>; } /// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are @@ -762,6 +757,33 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, let Inst{24} = 1; let Inst{23-21} = op23_21; } + + // Predicated versions. + def CCri : t2PseudoExpand<(outs GPRnopc:$Rd), + (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_imm:$imm, + pred:$p, cc_out:$s), 4, IIC_iALUi, [], + (!cast<Instruction>(NAME#ri) GPRnopc:$Rd, + GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rfalse = $Rd">; + def CCri12 : t2PseudoExpand<(outs GPRnopc:$Rd), + (ins GPRnopc:$Rfalse, GPR:$Rn, imm0_4095:$imm, + pred:$p), + 4, IIC_iALUi, [], + (!cast<Instruction>(NAME#ri12) GPRnopc:$Rd, + GPR:$Rn, imm0_4095:$imm, pred:$p)>, + RegConstraint<"$Rfalse = $Rd">; + def CCrr : t2PseudoExpand<(outs GPRnopc:$Rd), + (ins GPRnopc:$Rfalse, GPRnopc:$Rn, rGPR:$Rm, + pred:$p, cc_out:$s), 4, IIC_iALUr, [], + (!cast<Instruction>(NAME#rr) GPRnopc:$Rd, + GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rfalse = $Rd">; + def CCrs : t2PseudoExpand<(outs GPRnopc:$Rd), + (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_reg:$Rm, + pred:$p, cc_out:$s), 4, IIC_iALUsi, [], + (!cast<Instruction>(NAME#rs) GPRnopc:$Rd, + GPRnopc:$Rn, t2_so_reg:$Rm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rfalse = $Rd">; } /// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns @@ -808,8 +830,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, /// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift / // rotate operation that produces a value. -multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode, - string baseOpc> { +multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> { // 5-bit imm def ri : T2sTwoRegShiftImm< (outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi, @@ -834,33 +855,27 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode, // Optional destination register def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $imm"), - (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, - ty:$imm, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, ty:$imm, pred:$p, + cc_out:$s)>; def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $Rm"), - (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, - rGPR:$Rm, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, + cc_out:$s)>; // Assembler aliases w/o the ".w" suffix. def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $imm"), - (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn, - ty:$imm, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"ri") rGPR:$Rd, rGPR:$Rn, ty:$imm, pred:$p, + cc_out:$s)>; def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"), - (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn, - rGPR:$Rm, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"rr") rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, + cc_out:$s)>; // and with the optional destination operand, too. def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $imm"), - (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, - ty:$imm, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, ty:$imm, pred:$p, + cc_out:$s)>; def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"), - (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, - rGPR:$Rm, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, + cc_out:$s)>; } /// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test @@ -868,7 +883,7 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode, /// a explicit result, only implicitly set CPSR. multiclass T2I_cmp_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, string baseOpc> { + PatFrag opnode> { let isCompare = 1, Defs = [CPSR] in { // shifted imm def ri : T2OneRegCmpImm< @@ -913,12 +928,9 @@ let isCompare = 1, Defs = [CPSR] in { // No alias here for 'rr' version as not all instantiations of this // multiclass want one (CMP in particular, does not). def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $imm"), - (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPRnopc:$Rn, - t2_so_imm:$imm, pred:$p)>; + (!cast<Instruction>(NAME#"ri") GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>; def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $shift"), - (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPRnopc:$Rn, - t2_so_reg:$shift, - pred:$p)>; + (!cast<Instruction>(NAME#"rs") GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>; } /// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns. @@ -2152,13 +2164,13 @@ def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>; // defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31, - BinOpFrag<(shl node:$LHS, node:$RHS)>, "t2LSL">; + BinOpFrag<(shl node:$LHS, node:$RHS)>>; defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr, - BinOpFrag<(srl node:$LHS, node:$RHS)>, "t2LSR">; + BinOpFrag<(srl node:$LHS, node:$RHS)>>; defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr, - BinOpFrag<(sra node:$LHS, node:$RHS)>, "t2ASR">; + BinOpFrag<(sra node:$LHS, node:$RHS)>>; defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31, - BinOpFrag<(rotr node:$LHS, node:$RHS)>, "t2ROR">; + BinOpFrag<(rotr node:$LHS, node:$RHS)>>; // (rotr x, (and y, 0x...1f)) ==> (ROR x, y) def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)), @@ -2214,18 +2226,17 @@ def t2MOVsra_flag : T2TwoRegShiftImm< defm t2AND : T2I_bin_w_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(and node:$LHS, node:$RHS)>, "t2AND", 1>; + BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; defm t2ORR : T2I_bin_w_irs<0b0010, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(or node:$LHS, node:$RHS)>, "t2ORR", 1>; + BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; defm t2EOR : T2I_bin_w_irs<0b0100, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(xor node:$LHS, node:$RHS)>, "t2EOR", 1>; + BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>; defm t2BIC : T2I_bin_w_irs<0b0001, "bic", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(and node:$LHS, (not node:$RHS))>, - "t2BIC">; + BinOpFrag<(and node:$LHS, (not node:$RHS))>>; class T2BitFI<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -2305,8 +2316,7 @@ let Constraints = "$src = $Rd" in { defm t2ORN : T2I_bin_irs<0b0011, "orn", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(or node:$LHS, (not node:$RHS))>, - "t2ORN", 0, "">; + BinOpFrag<(or node:$LHS, (not node:$RHS))>, 0, "">; /// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a /// unary operation that produces a value. These are predicable and can be @@ -2878,7 +2888,7 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), // defm t2CMP : T2I_cmp_irs<0b1101, "cmp", IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi, - BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>, "t2CMP">; + BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_imm:$imm), (t2CMPri GPRnopc:$lhs, t2_so_imm:$imm)>; @@ -2932,13 +2942,10 @@ let isCompare = 1, Defs = [CPSR] in { // Assembler aliases w/o the ".w" suffix. // No alias here for 'rr' version as not all instantiations of this multiclass // want one (CMP in particular, does not). -def : t2InstAlias<!strconcat("cmn", "${p}", " $Rn, $imm"), - (!cast<Instruction>(!strconcat("t2CMN", "ri")) GPRnopc:$Rn, - t2_so_imm:$imm, pred:$p)>; -def : t2InstAlias<!strconcat("cmn", "${p}", " $Rn, $shift"), - (!cast<Instruction>(!strconcat("t2CMNz", "rs")) GPRnopc:$Rn, - t2_so_reg:$shift, - pred:$p)>; +def : t2InstAlias<"cmn${p} $Rn, $imm", + (t2CMNri GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>; +def : t2InstAlias<"cmn${p} $Rn, $shift", + (t2CMNzrs GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>; def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm), (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>; @@ -2948,19 +2955,17 @@ def : T2Pat<(ARMcmpZ GPRnopc:$src, t2_so_imm_neg:$imm), defm t2TST : T2I_cmp_irs<0b0000, "tst", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi, - BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, - "t2TST">; + BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>; defm t2TEQ : T2I_cmp_irs<0b0100, "teq", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi, - BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, - "t2TEQ">; + BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>; // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { -let isCommutable = 1 in +let isCommutable = 1, isSelect = 1 in def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm, pred:$p), 4, IIC_iCMOVr, @@ -3048,22 +3053,25 @@ multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> { // shifted imm def ri : t2PseudoExpand<(outs rGPR:$Rd), - (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s), + (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_imm:$imm, + pred:$p, cc_out:$s), 4, iii, [], (iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rn = $Rd">; + RegConstraint<"$Rfalse = $Rd">; // register def rr : t2PseudoExpand<(outs rGPR:$Rd), - (ins rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s), + (ins rGPR:$Rfalse, rGPR:$Rn, rGPR:$Rm, + pred:$p, cc_out:$s), 4, iir, [], (irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rn = $Rd">; + RegConstraint<"$Rfalse = $Rd">; // shifted register def rs : t2PseudoExpand<(outs rGPR:$Rd), - (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s), + (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_reg:$ShiftedRm, + pred:$p, cc_out:$s), 4, iis, [], (irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rn = $Rd">; + RegConstraint<"$Rfalse = $Rd">; } // T2I_bincc_irs defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs, diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 23c132e..7d6692f 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -61,6 +61,15 @@ def vfp_f64imm : Operand<f64>, let ParserMatchClass = FPImmOperand; } +def alignedload32 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 4; +}]>; + +def alignedstore32 : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; + // The VCVT to/from fixed-point instructions encode the 'fbits' operand // (the number of fixed bits) differently than it appears in the assembly // source. It's encoded as "Size - fbits" where Size is the size of the @@ -86,7 +95,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in { def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr), IIC_fpLoad64, "vldr", "\t$Dd, $addr", - [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>; + [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>; def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), IIC_fpLoad32, "vldr", "\t$Sd, $addr", @@ -100,7 +109,7 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr), IIC_fpStore64, "vstr", "\t$Dd, $addr", - [(store (f64 DPR:$Dd), addrmode5:$addr)]>; + [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>; def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr), IIC_fpStore32, "vstr", "\t$Sd, $addr", @@ -433,25 +442,25 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, // Between half-precision and single-precision. For disassembly only. // FIXME: Verify encoding after integrated assembler is working. -def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), +def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; -def : ARMPat<(f32_to_f16 SPR:$a), - (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; - -def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), +def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; -def : ARMPat<(f16_to_f32 GPR:$a), - (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; +def : Pat<(f32_to_f16 SPR:$a), + (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; + +def : Pat<(f16_to_f32 GPR:$a), + (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; -def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), +def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; -def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), +def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index c5db211..357fc3f 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -291,9 +291,9 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry) ResultPtr = ResultPtr >> 2; *((intptr_t*)RelocPos) |= ResultPtr; - // Set register Rn to PC. - *((intptr_t*)RelocPos) |= - getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift; + // Set register Rn to PC (which is register 15 on all architectures). + // FIXME: This avoids the need for register info in the JIT class. + *((intptr_t*)RelocPos) |= 15 << ARMII::RegRnShift; break; } case ARM::reloc_arm_so_imm_cp_entry: { diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index cb1b2a2..897ceb6 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -456,8 +456,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, DebugLoc dl = Loc->getDebugLoc(); const MachineOperand &PMO = Loc->getOperand(0); unsigned PReg = PMO.getReg(); - unsigned PRegNum = PMO.isUndef() ? UINT_MAX - : getARMRegisterNumbering(PReg); + unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); unsigned Count = 1; unsigned Limit = ~0U; @@ -483,8 +482,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, int NewOffset = MemOps[i].Offset; const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); unsigned Reg = MO.getReg(); - unsigned RegNum = MO.isUndef() ? UINT_MAX - : getARMRegisterNumbering(Reg); + unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); // Register numbers must be in ascending order. For VFP / NEON load and // store multiples, the registers must also be consecutive and within the // limit on the number of registers per instruction. diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 3857647..6f974fd 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -12,16 +12,16 @@ //===----------------------------------------------------------------------===// // Registers are identified with 4-bit ID numbers. -class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> { - field bits<4> Num; +class ARMReg<bits<16> Enc, string n, list<Register> subregs = []> : Register<n> { + let HWEncoding = Enc; let Namespace = "ARM"; let SubRegs = subregs; // All bits of ARM registers with sub-registers are covered by sub-registers. let CoveredBySubRegs = 1; } -class ARMFReg<bits<6> num, string n> : Register<n> { - field bits<6> Num; +class ARMFReg<bits<16> Enc, string n> : Register<n> { + let HWEncoding = Enc; let Namespace = "ARM"; } diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index 56197d4..2c63825 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -1069,6 +1069,7 @@ def CortexA8Model : SchedMachineModel { let LoadLatency = 2; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. + let MispredictPenalty = 13; // Based on estimate of pipeline depth. let Itineraries = CortexA8Itineraries; } diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 738974e..7bc590f 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1886,6 +1886,7 @@ def CortexA9Model : SchedMachineModel { let LoadLatency = 2; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. + let MispredictPenalty = 8; // Based on estimate of pipeline depth. let Itineraries = CortexA9Itineraries; } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index e067a9f..89e29ad 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -97,6 +97,9 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, if (!HasV6T2Ops && hasThumb2()) HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6T2Ops = true; + // Keep a pointer to static instruction cost data for the specified CPU. + SchedModel = getSchedModelForCPU(CPUString); + // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); @@ -179,15 +182,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, } unsigned ARMSubtarget::getMispredictionPenalty() const { - // If we have a reasonable estimate of the pipeline depth, then we can - // estimate the penalty of a misprediction based on that. - if (isCortexA8()) - return 13; - else if (isCortexA9()) - return 8; - - // Otherwise, just return a sensible default. - return 10; + return SchedModel->MispredictPenalty; } bool ARMSubtarget::enablePostRAScheduler( diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index e72b06f..b394061 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -74,7 +74,7 @@ protected: /// HasThumb2 - True if Thumb2 instructions are supported. bool HasThumb2; - /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs - + /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs - /// v6m, v7m for example. bool IsMClass; @@ -155,6 +155,9 @@ protected: /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; + /// SchedModel - Processor specific instruction costs. + const MCSchedModel *SchedModel; + /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 4497720..3a5957b 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -796,6 +796,13 @@ public: int64_t Value = CE->getValue(); return Value > 0 && Value <= 32; } + bool isAdrLabel() const { + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, but it can't fit + // into shift immediate encoding, we reject it. + if (isImm() && !isa<MCConstantExpr>(getImm())) return true; + else return (isARMSOImm() || isARMSOImmNeg()); + } bool isARMSOImm() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -1033,7 +1040,8 @@ public: // Immediate offset a multiple of 4 in range [-1020, 1020]. if (!Memory.OffsetImm) return true; int64_t Val = Memory.OffsetImm->getValue(); - return Val >= -1020 && Val <= 1020 && (Val & 3) == 0; + // Special case, #-0 is INT32_MIN. + return (Val >= -1020 && Val <= 1020 && (Val & 3) == 0) || Val == INT32_MIN; } bool isMemImm0_1020s4Offset() const { if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) @@ -1644,6 +1652,22 @@ public: Inst.addOperand(MCOperand::CreateImm(Imm)); } + void addAdrLabelOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + assert(isImm() && "Not an immediate!"); + + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. + if (!isa<MCConstantExpr>(getImm())) { + Inst.addOperand(MCOperand::CreateExpr(getImm())); + return; + } + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + int Val = CE->getValue(); + Inst.addOperand(MCOperand::CreateImm(Val)); + } + void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); @@ -2884,7 +2908,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!RC->contains(EndReg)) return Error(EndLoc, "invalid register in register list"); // Ranges must go from low to high. - if (getARMRegisterNumbering(Reg) > getARMRegisterNumbering(EndReg)) + if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg)) return Error(EndLoc, "bad range in register list"); // Add all the registers in the range to the register list. @@ -2911,13 +2935,13 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!RC->contains(Reg)) return Error(RegLoc, "invalid register in register list"); // List must be monotonically increasing. - if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) { + if (MRI->getEncodingValue(Reg) < MRI->getEncodingValue(OldReg)) { if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) Warning(RegLoc, "register list not in ascending order"); else return Error(RegLoc, "register list not in ascending order"); } - if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) { + if (MRI->getEncodingValue(Reg) == MRI->getEncodingValue(OldReg)) { Warning(RegLoc, "duplicated register (" + RegTok.getString() + ") in register list"); continue; @@ -3256,29 +3280,59 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); - if (!Tok.is(AsmToken::Identifier)) - return MatchOperand_NoMatch; - StringRef OptStr = Tok.getString(); - - unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower()) - .Case("sy", ARM_MB::SY) - .Case("st", ARM_MB::ST) - .Case("sh", ARM_MB::ISH) - .Case("ish", ARM_MB::ISH) - .Case("shst", ARM_MB::ISHST) - .Case("ishst", ARM_MB::ISHST) - .Case("nsh", ARM_MB::NSH) - .Case("un", ARM_MB::NSH) - .Case("nshst", ARM_MB::NSHST) - .Case("unst", ARM_MB::NSHST) - .Case("osh", ARM_MB::OSH) - .Case("oshst", ARM_MB::OSHST) - .Default(~0U); + unsigned Opt; + + if (Tok.is(AsmToken::Identifier)) { + StringRef OptStr = Tok.getString(); + + Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower()) + .Case("sy", ARM_MB::SY) + .Case("st", ARM_MB::ST) + .Case("sh", ARM_MB::ISH) + .Case("ish", ARM_MB::ISH) + .Case("shst", ARM_MB::ISHST) + .Case("ishst", ARM_MB::ISHST) + .Case("nsh", ARM_MB::NSH) + .Case("un", ARM_MB::NSH) + .Case("nshst", ARM_MB::NSHST) + .Case("unst", ARM_MB::NSHST) + .Case("osh", ARM_MB::OSH) + .Case("oshst", ARM_MB::OSHST) + .Default(~0U); - if (Opt == ~0U) - return MatchOperand_NoMatch; + if (Opt == ~0U) + return MatchOperand_NoMatch; + + Parser.Lex(); // Eat identifier token. + } else if (Tok.is(AsmToken::Hash) || + Tok.is(AsmToken::Dollar) || + Tok.is(AsmToken::Integer)) { + if (Parser.getTok().isNot(AsmToken::Integer)) + Parser.Lex(); // Eat the '#'. + SMLoc Loc = Parser.getTok().getLoc(); + + const MCExpr *MemBarrierID; + if (getParser().ParseExpression(MemBarrierID)) { + Error(Loc, "illegal expression"); + return MatchOperand_ParseFail; + } + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(MemBarrierID); + if (!CE) { + Error(Loc, "constant expression expected"); + return MatchOperand_ParseFail; + } + + int Val = CE->getValue(); + if (Val & ~0xf) { + Error(Loc, "immediate value out of range"); + return MatchOperand_ParseFail; + } + + Opt = ARM_MB::RESERVED_0 + Val; + } else + return MatchOperand_ParseFail; - Parser.Lex(); // Eat identifier token. Operands.push_back(ARMOperand::CreateMemBarrierOpt((ARM_MB::MemBOpt)Opt, S)); return MatchOperand_Success; } @@ -5250,8 +5304,8 @@ validateInstruction(MCInst &Inst, case ARM::LDRD_POST: case ARM::LDREXD: { // Rt2 must be Rt + 1. - unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg()); - unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg()); + unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg()); + unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg()); if (Rt2 != Rt + 1) return Error(Operands[3]->getStartLoc(), "destination operands must be sequential"); @@ -5259,8 +5313,8 @@ validateInstruction(MCInst &Inst, } case ARM::STRD: { // Rt2 must be Rt + 1. - unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg()); - unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg()); + unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg()); + unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg()); if (Rt2 != Rt + 1) return Error(Operands[3]->getStartLoc(), "source operands must be sequential"); @@ -5270,8 +5324,8 @@ validateInstruction(MCInst &Inst, case ARM::STRD_POST: case ARM::STREXD: { // Rt2 must be Rt + 1. - unsigned Rt = getARMRegisterNumbering(Inst.getOperand(1).getReg()); - unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(2).getReg()); + unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg()); + unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(2).getReg()); if (Rt2 != Rt + 1) return Error(Operands[3]->getStartLoc(), "source operands must be sequential"); diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 47cca2a..c90751d 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -18,10 +18,12 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include <vector> @@ -383,7 +385,6 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); #include "ARMGenDisassemblerTables.inc" -#include "ARMGenInstrInfo.inc" #include "ARMGenEDInfo.inc" static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) { @@ -427,7 +428,8 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, (bytes[0] << 0); // Calling the auto-generated decoder function. - DecodeStatus result = decodeARMInstruction32(MI, insn, Address, this, STI); + DecodeStatus result = decodeInstruction(DecoderTableARM32, MI, insn, + Address, this, STI); if (result != MCDisassembler::Fail) { Size = 4; return result; @@ -436,14 +438,15 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, // VFP and NEON instructions, similarly, are shared between ARM // and Thumb modes. MI.clear(); - result = decodeVFPInstruction32(MI, insn, Address, this, STI); + result = decodeInstruction(DecoderTableVFP32, MI, insn, Address, this, STI); if (result != MCDisassembler::Fail) { Size = 4; return result; } MI.clear(); - result = decodeNEONDataInstruction32(MI, insn, Address, this, STI); + result = decodeInstruction(DecoderTableNEONData32, MI, insn, Address, + this, STI); if (result != MCDisassembler::Fail) { Size = 4; // Add a fake predicate operand, because we share these instruction @@ -454,7 +457,8 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); - result = decodeNEONLoadStoreInstruction32(MI, insn, Address, this, STI); + result = decodeInstruction(DecoderTableNEONLoadStore32, MI, insn, Address, + this, STI); if (result != MCDisassembler::Fail) { Size = 4; // Add a fake predicate operand, because we share these instruction @@ -465,7 +469,8 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); - result = decodeNEONDupInstruction32(MI, insn, Address, this, STI); + result = decodeInstruction(DecoderTableNEONDup32, MI, insn, Address, + this, STI); if (result != MCDisassembler::Fail) { Size = 4; // Add a fake predicate operand, because we share these instruction @@ -765,7 +770,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } uint16_t insn16 = (bytes[1] << 8) | bytes[0]; - DecodeStatus result = decodeThumbInstruction16(MI, insn16, Address, this, STI); + DecodeStatus result = decodeInstruction(DecoderTableThumb16, MI, insn16, + Address, this, STI); if (result != MCDisassembler::Fail) { Size = 2; Check(result, AddThumbPredicate(MI)); @@ -773,7 +779,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); - result = decodeThumbSBitInstruction16(MI, insn16, Address, this, STI); + result = decodeInstruction(DecoderTableThumbSBit16, MI, insn16, + Address, this, STI); if (result) { Size = 2; bool InITBlock = ITBlock.instrInITBlock(); @@ -783,7 +790,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); - result = decodeThumb2Instruction16(MI, insn16, Address, this, STI); + result = decodeInstruction(DecoderTableThumb216, MI, insn16, + Address, this, STI); if (result != MCDisassembler::Fail) { Size = 2; @@ -818,7 +826,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, (bytes[1] << 24) | (bytes[0] << 16); MI.clear(); - result = decodeThumbInstruction32(MI, insn32, Address, this, STI); + result = decodeInstruction(DecoderTableThumb32, MI, insn32, Address, + this, STI); if (result != MCDisassembler::Fail) { Size = 4; bool InITBlock = ITBlock.instrInITBlock(); @@ -828,7 +837,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); - result = decodeThumb2Instruction32(MI, insn32, Address, this, STI); + result = decodeInstruction(DecoderTableThumb232, MI, insn32, Address, + this, STI); if (result != MCDisassembler::Fail) { Size = 4; Check(result, AddThumbPredicate(MI)); @@ -836,7 +846,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); - result = decodeVFPInstruction32(MI, insn32, Address, this, STI); + result = decodeInstruction(DecoderTableVFP32, MI, insn32, Address, this, STI); if (result != MCDisassembler::Fail) { Size = 4; UpdateThumbVFPPredicate(MI); @@ -844,19 +854,21 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); - result = decodeNEONDupInstruction32(MI, insn32, Address, this, STI); + result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address, + this, STI); if (result != MCDisassembler::Fail) { Size = 4; Check(result, AddThumbPredicate(MI)); return result; } - if (fieldFromInstruction32(insn32, 24, 8) == 0xF9) { + if (fieldFromInstruction(insn32, 24, 8) == 0xF9) { MI.clear(); uint32_t NEONLdStInsn = insn32; NEONLdStInsn &= 0xF0FFFFFF; NEONLdStInsn |= 0x04000000; - result = decodeNEONLoadStoreInstruction32(MI, NEONLdStInsn, Address, this, STI); + result = decodeInstruction(DecoderTableNEONLoadStore32, MI, NEONLdStInsn, + Address, this, STI); if (result != MCDisassembler::Fail) { Size = 4; Check(result, AddThumbPredicate(MI)); @@ -864,13 +876,14 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } } - if (fieldFromInstruction32(insn32, 24, 4) == 0xF) { + if (fieldFromInstruction(insn32, 24, 4) == 0xF) { MI.clear(); uint32_t NEONDataInsn = insn32; NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24 NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24 NEONDataInsn |= 0x12000000; // Set bits 28 and 25 - result = decodeNEONDataInstruction32(MI, NEONDataInsn, Address, this, STI); + result = decodeInstruction(DecoderTableNEONData32, MI, NEONDataInsn, + Address, this, STI); if (result != MCDisassembler::Fail) { Size = 4; Check(result, AddThumbPredicate(MI)); @@ -1117,9 +1130,9 @@ static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rm = fieldFromInstruction32(Val, 0, 4); - unsigned type = fieldFromInstruction32(Val, 5, 2); - unsigned imm = fieldFromInstruction32(Val, 7, 5); + unsigned Rm = fieldFromInstruction(Val, 0, 4); + unsigned type = fieldFromInstruction(Val, 5, 2); + unsigned imm = fieldFromInstruction(Val, 7, 5); // Register-immediate if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) @@ -1154,9 +1167,9 @@ static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rm = fieldFromInstruction32(Val, 0, 4); - unsigned type = fieldFromInstruction32(Val, 5, 2); - unsigned Rs = fieldFromInstruction32(Val, 8, 4); + unsigned Rm = fieldFromInstruction(Val, 0, 4); + unsigned type = fieldFromInstruction(Val, 5, 2); + unsigned Rs = fieldFromInstruction(Val, 8, 4); // Register-register if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder))) @@ -1224,8 +1237,8 @@ static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Vd = fieldFromInstruction32(Val, 8, 5); - unsigned regs = fieldFromInstruction32(Val, 0, 8); + unsigned Vd = fieldFromInstruction(Val, 8, 5); + unsigned regs = fieldFromInstruction(Val, 0, 8); if (!Check(S, DecodeSPRRegisterClass(Inst, Vd, Address, Decoder))) return MCDisassembler::Fail; @@ -1241,8 +1254,8 @@ static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Vd = fieldFromInstruction32(Val, 8, 5); - unsigned regs = fieldFromInstruction32(Val, 0, 8); + unsigned Vd = fieldFromInstruction(Val, 8, 5); + unsigned regs = fieldFromInstruction(Val, 0, 8); regs = regs >> 1; @@ -1263,8 +1276,8 @@ static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val, // the mask of all bits LSB-and-lower, and then xor them to create // the mask of that's all ones on [msb, lsb]. Finally we not it to // create the final mask. - unsigned msb = fieldFromInstruction32(Val, 5, 5); - unsigned lsb = fieldFromInstruction32(Val, 0, 5); + unsigned msb = fieldFromInstruction(Val, 5, 5); + unsigned lsb = fieldFromInstruction(Val, 0, 5); DecodeStatus S = MCDisassembler::Success; if (lsb > msb) Check(S, MCDisassembler::SoftFail); @@ -1281,12 +1294,12 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned pred = fieldFromInstruction32(Insn, 28, 4); - unsigned CRd = fieldFromInstruction32(Insn, 12, 4); - unsigned coproc = fieldFromInstruction32(Insn, 8, 4); - unsigned imm = fieldFromInstruction32(Insn, 0, 8); - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned U = fieldFromInstruction32(Insn, 23, 1); + unsigned pred = fieldFromInstruction(Insn, 28, 4); + unsigned CRd = fieldFromInstruction(Insn, 12, 4); + unsigned coproc = fieldFromInstruction(Insn, 8, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 8); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned U = fieldFromInstruction(Insn, 23, 1); switch (Inst.getOpcode()) { case ARM::LDC_OFFSET: @@ -1426,14 +1439,14 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned imm = fieldFromInstruction32(Insn, 0, 12); - unsigned pred = fieldFromInstruction32(Insn, 28, 4); - unsigned reg = fieldFromInstruction32(Insn, 25, 1); - unsigned P = fieldFromInstruction32(Insn, 24, 1); - unsigned W = fieldFromInstruction32(Insn, 21, 1); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 12); + unsigned pred = fieldFromInstruction(Insn, 28, 4); + unsigned reg = fieldFromInstruction(Insn, 25, 1); + unsigned P = fieldFromInstruction(Insn, 24, 1); + unsigned W = fieldFromInstruction(Insn, 21, 1); // On stores, the writeback operand precedes Rt. switch (Inst.getOpcode()) { @@ -1476,7 +1489,7 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; ARM_AM::AddrOpc Op = ARM_AM::add; - if (!fieldFromInstruction32(Insn, 23, 1)) + if (!fieldFromInstruction(Insn, 23, 1)) Op = ARM_AM::sub; bool writeback = (P == 0) || (W == 1); @@ -1493,7 +1506,7 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; ARM_AM::ShiftOpc Opc = ARM_AM::lsl; - switch( fieldFromInstruction32(Insn, 5, 2)) { + switch( fieldFromInstruction(Insn, 5, 2)) { case 0: Opc = ARM_AM::lsl; break; @@ -1509,7 +1522,7 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, default: return MCDisassembler::Fail; } - unsigned amt = fieldFromInstruction32(Insn, 7, 5); + unsigned amt = fieldFromInstruction(Insn, 7, 5); unsigned imm = ARM_AM::getAM2Opc(Op, amt, Opc, idx_mode); Inst.addOperand(MCOperand::CreateImm(imm)); @@ -1529,11 +1542,11 @@ static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Val, 13, 4); - unsigned Rm = fieldFromInstruction32(Val, 0, 4); - unsigned type = fieldFromInstruction32(Val, 5, 2); - unsigned imm = fieldFromInstruction32(Val, 7, 5); - unsigned U = fieldFromInstruction32(Val, 12, 1); + unsigned Rn = fieldFromInstruction(Val, 13, 4); + unsigned Rm = fieldFromInstruction(Val, 0, 4); + unsigned type = fieldFromInstruction(Val, 5, 2); + unsigned imm = fieldFromInstruction(Val, 7, 5); + unsigned U = fieldFromInstruction(Val, 12, 1); ARM_AM::ShiftOpc ShOp = ARM_AM::lsl; switch (type) { @@ -1570,15 +1583,15 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned type = fieldFromInstruction32(Insn, 22, 1); - unsigned imm = fieldFromInstruction32(Insn, 8, 4); - unsigned U = ((~fieldFromInstruction32(Insn, 23, 1)) & 1) << 8; - unsigned pred = fieldFromInstruction32(Insn, 28, 4); - unsigned W = fieldFromInstruction32(Insn, 21, 1); - unsigned P = fieldFromInstruction32(Insn, 24, 1); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned type = fieldFromInstruction(Insn, 22, 1); + unsigned imm = fieldFromInstruction(Insn, 8, 4); + unsigned U = ((~fieldFromInstruction(Insn, 23, 1)) & 1) << 8; + unsigned pred = fieldFromInstruction(Insn, 28, 4); + unsigned W = fieldFromInstruction(Insn, 21, 1); + unsigned P = fieldFromInstruction(Insn, 24, 1); unsigned Rt2 = Rt + 1; bool writeback = (W == 1) | (P == 0); @@ -1609,7 +1622,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, S = MCDisassembler::SoftFail; if (Rt2 == 15) S = MCDisassembler::SoftFail; - if (!type && fieldFromInstruction32(Insn, 8, 4)) + if (!type && fieldFromInstruction(Insn, 8, 4)) S = MCDisassembler::SoftFail; break; case ARM::STRH: @@ -1761,8 +1774,8 @@ static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned mode = fieldFromInstruction32(Insn, 23, 2); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned mode = fieldFromInstruction(Insn, 23, 2); switch (mode) { case 0: @@ -1791,9 +1804,9 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned pred = fieldFromInstruction32(Insn, 28, 4); - unsigned reglist = fieldFromInstruction32(Insn, 0, 16); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned pred = fieldFromInstruction(Insn, 28, 4); + unsigned reglist = fieldFromInstruction(Insn, 0, 16); if (pred == 0xF) { switch (Inst.getOpcode()) { @@ -1850,9 +1863,9 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, } // For stores (which become SRS's, the only operand is the mode. - if (fieldFromInstruction32(Insn, 20, 1) == 0) { + if (fieldFromInstruction(Insn, 20, 1) == 0) { Inst.addOperand( - MCOperand::CreateImm(fieldFromInstruction32(Insn, 0, 4))); + MCOperand::CreateImm(fieldFromInstruction(Insn, 0, 4))); return S; } @@ -1873,10 +1886,10 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { - unsigned imod = fieldFromInstruction32(Insn, 18, 2); - unsigned M = fieldFromInstruction32(Insn, 17, 1); - unsigned iflags = fieldFromInstruction32(Insn, 6, 3); - unsigned mode = fieldFromInstruction32(Insn, 0, 5); + unsigned imod = fieldFromInstruction(Insn, 18, 2); + unsigned M = fieldFromInstruction(Insn, 17, 1); + unsigned iflags = fieldFromInstruction(Insn, 6, 3); + unsigned mode = fieldFromInstruction(Insn, 0, 5); DecodeStatus S = MCDisassembler::Success; @@ -1913,10 +1926,10 @@ static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { - unsigned imod = fieldFromInstruction32(Insn, 9, 2); - unsigned M = fieldFromInstruction32(Insn, 8, 1); - unsigned iflags = fieldFromInstruction32(Insn, 5, 3); - unsigned mode = fieldFromInstruction32(Insn, 0, 5); + unsigned imod = fieldFromInstruction(Insn, 9, 2); + unsigned M = fieldFromInstruction(Insn, 8, 1); + unsigned iflags = fieldFromInstruction(Insn, 5, 3); + unsigned mode = fieldFromInstruction(Insn, 0, 5); DecodeStatus S = MCDisassembler::Success; @@ -1955,13 +1968,13 @@ static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 8, 4); + unsigned Rd = fieldFromInstruction(Insn, 8, 4); unsigned imm = 0; - imm |= (fieldFromInstruction32(Insn, 0, 8) << 0); - imm |= (fieldFromInstruction32(Insn, 12, 3) << 8); - imm |= (fieldFromInstruction32(Insn, 16, 4) << 12); - imm |= (fieldFromInstruction32(Insn, 26, 1) << 11); + imm |= (fieldFromInstruction(Insn, 0, 8) << 0); + imm |= (fieldFromInstruction(Insn, 12, 3) << 8); + imm |= (fieldFromInstruction(Insn, 16, 4) << 12); + imm |= (fieldFromInstruction(Insn, 26, 1) << 11); if (Inst.getOpcode() == ARM::t2MOVTi16) if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) @@ -1979,12 +1992,12 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + unsigned pred = fieldFromInstruction(Insn, 28, 4); unsigned imm = 0; - imm |= (fieldFromInstruction32(Insn, 0, 12) << 0); - imm |= (fieldFromInstruction32(Insn, 16, 4) << 12); + imm |= (fieldFromInstruction(Insn, 0, 12) << 0); + imm |= (fieldFromInstruction(Insn, 16, 4) << 12); if (Inst.getOpcode() == ARM::MOVTi16) if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) @@ -2005,11 +2018,11 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 16, 4); - unsigned Rn = fieldFromInstruction32(Insn, 0, 4); - unsigned Rm = fieldFromInstruction32(Insn, 8, 4); - unsigned Ra = fieldFromInstruction32(Insn, 12, 4); - unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned Rd = fieldFromInstruction(Insn, 16, 4); + unsigned Rn = fieldFromInstruction(Insn, 0, 4); + unsigned Rm = fieldFromInstruction(Insn, 8, 4); + unsigned Ra = fieldFromInstruction(Insn, 12, 4); + unsigned pred = fieldFromInstruction(Insn, 28, 4); if (pred == 0xF) return DecodeCPSInstruction(Inst, Insn, Address, Decoder); @@ -2033,9 +2046,9 @@ static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned add = fieldFromInstruction32(Val, 12, 1); - unsigned imm = fieldFromInstruction32(Val, 0, 12); - unsigned Rn = fieldFromInstruction32(Val, 13, 4); + unsigned add = fieldFromInstruction(Val, 12, 1); + unsigned imm = fieldFromInstruction(Val, 0, 12); + unsigned Rn = fieldFromInstruction(Val, 13, 4); if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -2053,9 +2066,9 @@ static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Val, 9, 4); - unsigned U = fieldFromInstruction32(Val, 8, 1); - unsigned imm = fieldFromInstruction32(Val, 0, 8); + unsigned Rn = fieldFromInstruction(Val, 9, 4); + unsigned U = fieldFromInstruction(Val, 8, 1); + unsigned imm = fieldFromInstruction(Val, 0, 8); if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -2077,11 +2090,11 @@ static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned imm = (fieldFromInstruction32(Insn, 0, 11) << 0) | - (fieldFromInstruction32(Insn, 11, 1) << 18) | - (fieldFromInstruction32(Insn, 13, 1) << 17) | - (fieldFromInstruction32(Insn, 16, 6) << 11) | - (fieldFromInstruction32(Insn, 26, 1) << 19); + unsigned imm = (fieldFromInstruction(Insn, 0, 11) << 0) | + (fieldFromInstruction(Insn, 11, 1) << 18) | + (fieldFromInstruction(Insn, 13, 1) << 17) | + (fieldFromInstruction(Insn, 16, 6) << 11) | + (fieldFromInstruction(Insn, 26, 1) << 19); if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<20>(imm<<1) + 4, true, 4, Inst, Decoder)) Inst.addOperand(MCOperand::CreateImm(SignExtend32<20>(imm << 1))); @@ -2093,12 +2106,12 @@ DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned pred = fieldFromInstruction32(Insn, 28, 4); - unsigned imm = fieldFromInstruction32(Insn, 0, 24) << 2; + unsigned pred = fieldFromInstruction(Insn, 28, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 24) << 2; if (pred == 0xF) { Inst.setOpcode(ARM::BLXi); - imm |= fieldFromInstruction32(Insn, 24, 1) << 1; + imm |= fieldFromInstruction(Insn, 24, 1) << 1; if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, true, 4, Inst, Decoder)) Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm))); @@ -2119,8 +2132,8 @@ static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rm = fieldFromInstruction32(Val, 0, 4); - unsigned align = fieldFromInstruction32(Val, 4, 2); + unsigned Rm = fieldFromInstruction(Val, 0, 4); + unsigned align = fieldFromInstruction(Val, 4, 2); if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; @@ -2136,12 +2149,12 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned wb = fieldFromInstruction32(Insn, 16, 4); - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - Rn |= fieldFromInstruction32(Insn, 4, 2) << 4; - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned wb = fieldFromInstruction(Insn, 16, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + Rn |= fieldFromInstruction(Insn, 4, 2) << 4; + unsigned Rm = fieldFromInstruction(Insn, 0, 4); // First output register switch (Inst.getOpcode()) { @@ -2410,12 +2423,12 @@ static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned wb = fieldFromInstruction32(Insn, 16, 4); - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - Rn |= fieldFromInstruction32(Insn, 4, 2) << 4; - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned wb = fieldFromInstruction(Insn, 16, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + Rn |= fieldFromInstruction(Insn, 4, 2) << 4; + unsigned Rm = fieldFromInstruction(Insn, 0, 4); // Writeback Operand switch (Inst.getOpcode()) { @@ -2681,12 +2694,12 @@ static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned align = fieldFromInstruction32(Insn, 4, 1); - unsigned size = fieldFromInstruction32(Insn, 6, 2); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned align = fieldFromInstruction(Insn, 4, 1); + unsigned size = fieldFromInstruction(Insn, 6, 2); align *= (1 << size); @@ -2726,12 +2739,12 @@ static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned align = fieldFromInstruction32(Insn, 4, 1); - unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned align = fieldFromInstruction(Insn, 4, 1); + unsigned size = 1 << fieldFromInstruction(Insn, 6, 2); align *= 2*size; switch (Inst.getOpcode()) { @@ -2774,11 +2787,11 @@ static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1; + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned inc = fieldFromInstruction(Insn, 5, 1) + 1; if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; @@ -2809,13 +2822,13 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned size = fieldFromInstruction32(Insn, 6, 2); - unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1; - unsigned align = fieldFromInstruction32(Insn, 4, 1); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned size = fieldFromInstruction(Insn, 6, 2); + unsigned inc = fieldFromInstruction(Insn, 5, 1) + 1; + unsigned align = fieldFromInstruction(Insn, 4, 1); if (size == 0x3) { size = 4; @@ -2862,14 +2875,14 @@ DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned imm = fieldFromInstruction32(Insn, 0, 4); - imm |= fieldFromInstruction32(Insn, 16, 3) << 4; - imm |= fieldFromInstruction32(Insn, 24, 1) << 7; - imm |= fieldFromInstruction32(Insn, 8, 4) << 8; - imm |= fieldFromInstruction32(Insn, 5, 1) << 12; - unsigned Q = fieldFromInstruction32(Insn, 6, 1); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned imm = fieldFromInstruction(Insn, 0, 4); + imm |= fieldFromInstruction(Insn, 16, 3) << 4; + imm |= fieldFromInstruction(Insn, 24, 1) << 7; + imm |= fieldFromInstruction(Insn, 8, 4) << 8; + imm |= fieldFromInstruction(Insn, 5, 1) << 12; + unsigned Q = fieldFromInstruction(Insn, 6, 1); if (Q) { if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder))) @@ -2907,11 +2920,11 @@ static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; - unsigned size = fieldFromInstruction32(Insn, 18, 2); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + Rm |= fieldFromInstruction(Insn, 5, 1) << 4; + unsigned size = fieldFromInstruction(Insn, 18, 2); if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; @@ -2950,13 +2963,13 @@ static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - Rn |= fieldFromInstruction32(Insn, 7, 1) << 4; - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; - unsigned op = fieldFromInstruction32(Insn, 6, 1); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + Rn |= fieldFromInstruction(Insn, 7, 1) << 4; + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + Rm |= fieldFromInstruction(Insn, 5, 1) << 4; + unsigned op = fieldFromInstruction(Insn, 6, 1); if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; @@ -2986,8 +2999,8 @@ static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned dst = fieldFromInstruction16(Insn, 8, 3); - unsigned imm = fieldFromInstruction16(Insn, 0, 8); + unsigned dst = fieldFromInstruction(Insn, 8, 3); + unsigned imm = fieldFromInstruction(Insn, 0, 8); if (!Check(S, DecodetGPRRegisterClass(Inst, dst, Address, Decoder))) return MCDisassembler::Fail; @@ -3034,8 +3047,8 @@ static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Val, 0, 3); - unsigned Rm = fieldFromInstruction32(Val, 3, 3); + unsigned Rn = fieldFromInstruction(Val, 0, 3); + unsigned Rm = fieldFromInstruction(Val, 3, 3); if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -3049,8 +3062,8 @@ static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Val, 0, 3); - unsigned imm = fieldFromInstruction32(Val, 3, 5); + unsigned Rn = fieldFromInstruction(Val, 0, 3); + unsigned imm = fieldFromInstruction(Val, 3, 5); if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -3081,9 +3094,9 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Val, 6, 4); - unsigned Rm = fieldFromInstruction32(Val, 2, 4); - unsigned imm = fieldFromInstruction32(Val, 0, 2); + unsigned Rn = fieldFromInstruction(Val, 6, 4); + unsigned Rm = fieldFromInstruction(Val, 2, 4); + unsigned imm = fieldFromInstruction(Val, 0, 2); if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -3104,13 +3117,13 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, case ARM::t2PLIs: break; default: { - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder))) return MCDisassembler::Fail; } } - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); if (Rn == 0xF) { switch (Inst.getOpcode()) { case ARM::t2LDRBs: @@ -3133,16 +3146,16 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; } - int imm = fieldFromInstruction32(Insn, 0, 12); - if (!fieldFromInstruction32(Insn, 23, 1)) imm *= -1; + int imm = fieldFromInstruction(Insn, 0, 12); + if (!fieldFromInstruction(Insn, 23, 1)) imm *= -1; Inst.addOperand(MCOperand::CreateImm(imm)); return S; } - unsigned addrmode = fieldFromInstruction32(Insn, 4, 2); - addrmode |= fieldFromInstruction32(Insn, 0, 4) << 2; - addrmode |= fieldFromInstruction32(Insn, 16, 4) << 6; + unsigned addrmode = fieldFromInstruction(Insn, 4, 2); + addrmode |= fieldFromInstruction(Insn, 0, 4) << 2; + addrmode |= fieldFromInstruction(Insn, 16, 4) << 6; if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder))) return MCDisassembler::Fail; @@ -3151,9 +3164,14 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - int imm = Val & 0xFF; - if (!(Val & 0x100)) imm *= -1; - Inst.addOperand(MCOperand::CreateImm(imm << 2)); + if (Val == 0) + Inst.addOperand(MCOperand::CreateImm(INT32_MIN)); + else { + int imm = Val & 0xFF; + + if (!(Val & 0x100)) imm *= -1; + Inst.addOperand(MCOperand::CreateImm(imm << 2)); + } return MCDisassembler::Success; } @@ -3162,8 +3180,8 @@ static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Val, 9, 4); - unsigned imm = fieldFromInstruction32(Val, 0, 9); + unsigned Rn = fieldFromInstruction(Val, 9, 4); + unsigned imm = fieldFromInstruction(Val, 0, 9); if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -3177,8 +3195,8 @@ static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Val, 8, 4); - unsigned imm = fieldFromInstruction32(Val, 0, 8); + unsigned Rn = fieldFromInstruction(Val, 8, 4); + unsigned imm = fieldFromInstruction(Val, 0, 8); if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -3205,8 +3223,8 @@ static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Val, 9, 4); - unsigned imm = fieldFromInstruction32(Val, 0, 9); + unsigned Rn = fieldFromInstruction(Val, 9, 4); + unsigned imm = fieldFromInstruction(Val, 0, 9); // Some instructions always use an additive offset. switch (Inst.getOpcode()) { @@ -3236,12 +3254,12 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned addr = fieldFromInstruction32(Insn, 0, 8); - addr |= fieldFromInstruction32(Insn, 9, 1) << 8; + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned addr = fieldFromInstruction(Insn, 0, 8); + addr |= fieldFromInstruction(Insn, 9, 1) << 8; addr |= Rn << 9; - unsigned load = fieldFromInstruction32(Insn, 20, 1); + unsigned load = fieldFromInstruction(Insn, 20, 1); if (!load) { if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) @@ -3266,8 +3284,8 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Val, 13, 4); - unsigned imm = fieldFromInstruction32(Val, 0, 12); + unsigned Rn = fieldFromInstruction(Val, 13, 4); + unsigned imm = fieldFromInstruction(Val, 0, 12); if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -3279,7 +3297,7 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { - unsigned imm = fieldFromInstruction16(Insn, 0, 7); + unsigned imm = fieldFromInstruction(Insn, 0, 7); Inst.addOperand(MCOperand::CreateReg(ARM::SP)); Inst.addOperand(MCOperand::CreateReg(ARM::SP)); @@ -3293,8 +3311,8 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, DecodeStatus S = MCDisassembler::Success; if (Inst.getOpcode() == ARM::tADDrSP) { - unsigned Rdm = fieldFromInstruction16(Insn, 0, 3); - Rdm |= fieldFromInstruction16(Insn, 7, 1) << 3; + unsigned Rdm = fieldFromInstruction(Insn, 0, 3); + Rdm |= fieldFromInstruction(Insn, 7, 1) << 3; if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder))) return MCDisassembler::Fail; @@ -3302,7 +3320,7 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder))) return MCDisassembler::Fail; } else if (Inst.getOpcode() == ARM::tADDspr) { - unsigned Rm = fieldFromInstruction16(Insn, 3, 4); + unsigned Rm = fieldFromInstruction(Insn, 3, 4); Inst.addOperand(MCOperand::CreateReg(ARM::SP)); Inst.addOperand(MCOperand::CreateReg(ARM::SP)); @@ -3315,8 +3333,8 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { - unsigned imod = fieldFromInstruction16(Insn, 4, 1) | 0x2; - unsigned flags = fieldFromInstruction16(Insn, 0, 3); + unsigned imod = fieldFromInstruction(Insn, 4, 1) | 0x2; + unsigned flags = fieldFromInstruction(Insn, 0, 3); Inst.addOperand(MCOperand::CreateImm(imod)); Inst.addOperand(MCOperand::CreateImm(flags)); @@ -3327,8 +3345,8 @@ static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn, static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned add = fieldFromInstruction32(Insn, 4, 1); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned add = fieldFromInstruction(Insn, 4, 1); if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; @@ -3375,8 +3393,8 @@ DecodeThumbTableBranch(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); if (Rn == ARM::SP) S = MCDisassembler::SoftFail; if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) @@ -3391,9 +3409,9 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned pred = fieldFromInstruction32(Insn, 22, 4); + unsigned pred = fieldFromInstruction(Insn, 22, 4); if (pred == 0xE || pred == 0xF) { - unsigned opc = fieldFromInstruction32(Insn, 4, 28); + unsigned opc = fieldFromInstruction(Insn, 4, 28); switch (opc) { default: return MCDisassembler::Fail; @@ -3408,15 +3426,15 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn, break; } - unsigned imm = fieldFromInstruction32(Insn, 0, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 4); return DecodeMemBarrierOption(Inst, imm, Address, Decoder); } - unsigned brtarget = fieldFromInstruction32(Insn, 0, 11) << 1; - brtarget |= fieldFromInstruction32(Insn, 11, 1) << 19; - brtarget |= fieldFromInstruction32(Insn, 13, 1) << 18; - brtarget |= fieldFromInstruction32(Insn, 16, 6) << 12; - brtarget |= fieldFromInstruction32(Insn, 26, 1) << 20; + unsigned brtarget = fieldFromInstruction(Insn, 0, 11) << 1; + brtarget |= fieldFromInstruction(Insn, 11, 1) << 19; + brtarget |= fieldFromInstruction(Insn, 13, 1) << 18; + brtarget |= fieldFromInstruction(Insn, 16, 6) << 12; + brtarget |= fieldFromInstruction(Insn, 26, 1) << 20; if (!Check(S, DecodeT2BROperand(Inst, brtarget, Address, Decoder))) return MCDisassembler::Fail; @@ -3431,10 +3449,10 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn, // a splat operation or a rotation. static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - unsigned ctrl = fieldFromInstruction32(Val, 10, 2); + unsigned ctrl = fieldFromInstruction(Val, 10, 2); if (ctrl == 0) { - unsigned byte = fieldFromInstruction32(Val, 8, 2); - unsigned imm = fieldFromInstruction32(Val, 0, 8); + unsigned byte = fieldFromInstruction(Val, 8, 2); + unsigned imm = fieldFromInstruction(Val, 0, 8); switch (byte) { case 0: Inst.addOperand(MCOperand::CreateImm(imm)); @@ -3451,8 +3469,8 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, break; } } else { - unsigned unrot = fieldFromInstruction32(Val, 0, 7) | 0x80; - unsigned rot = fieldFromInstruction32(Val, 7, 5); + unsigned unrot = fieldFromInstruction(Val, 0, 7) | 0x80; + unsigned rot = fieldFromInstruction(Val, 7, 5); unsigned imm = (unrot >> rot) | (unrot << ((32-rot)&31)); Inst.addOperand(MCOperand::CreateImm(imm)); } @@ -3494,19 +3512,8 @@ static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val, static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - switch (Val) { - default: + if (Val & ~0xf) return MCDisassembler::Fail; - case 0xF: // SY - case 0xE: // ST - case 0xB: // ISH - case 0xA: // ISHST - case 0x7: // NSH - case 0x6: // NSHST - case 0x3: // OSH - case 0x2: // OSHST - break; - } Inst.addOperand(MCOperand::CreateImm(Val)); return MCDisassembler::Success; @@ -3523,9 +3530,9 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned pred = fieldFromInstruction(Insn, 28, 4); if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail; @@ -3546,10 +3553,10 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder){ DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - unsigned Rt = fieldFromInstruction32(Insn, 0, 4); - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + unsigned Rt = fieldFromInstruction(Insn, 0, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned pred = fieldFromInstruction(Insn, 28, 4); if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; @@ -3573,12 +3580,12 @@ static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned imm = fieldFromInstruction32(Insn, 0, 12); - imm |= fieldFromInstruction32(Insn, 16, 4) << 13; - imm |= fieldFromInstruction32(Insn, 23, 1) << 12; - unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 12); + imm |= fieldFromInstruction(Insn, 16, 4) << 13; + imm |= fieldFromInstruction(Insn, 23, 1) << 12; + unsigned pred = fieldFromInstruction(Insn, 28, 4); if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail; @@ -3598,13 +3605,13 @@ static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned imm = fieldFromInstruction32(Insn, 0, 12); - imm |= fieldFromInstruction32(Insn, 16, 4) << 13; - imm |= fieldFromInstruction32(Insn, 23, 1) << 12; - unsigned pred = fieldFromInstruction32(Insn, 28, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 12); + imm |= fieldFromInstruction(Insn, 16, 4) << 13; + imm |= fieldFromInstruction(Insn, 23, 1) << 12; + unsigned pred = fieldFromInstruction(Insn, 28, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail; if (Rm == 0xF) S = MCDisassembler::SoftFail; @@ -3626,12 +3633,12 @@ static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned imm = fieldFromInstruction32(Insn, 0, 12); - imm |= fieldFromInstruction32(Insn, 16, 4) << 13; - imm |= fieldFromInstruction32(Insn, 23, 1) << 12; - unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 12); + imm |= fieldFromInstruction(Insn, 16, 4) << 13; + imm |= fieldFromInstruction(Insn, 23, 1) << 12; + unsigned pred = fieldFromInstruction(Insn, 28, 4); if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail; @@ -3651,12 +3658,12 @@ static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned imm = fieldFromInstruction32(Insn, 0, 12); - imm |= fieldFromInstruction32(Insn, 16, 4) << 13; - imm |= fieldFromInstruction32(Insn, 23, 1) << 12; - unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 12); + imm |= fieldFromInstruction(Insn, 16, 4) << 13; + imm |= fieldFromInstruction(Insn, 23, 1) << 12; + unsigned pred = fieldFromInstruction(Insn, 28, 4); if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail; @@ -3676,11 +3683,11 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned size = fieldFromInstruction32(Insn, 10, 2); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction(Insn, 10, 2); unsigned align = 0; unsigned index = 0; @@ -3688,22 +3695,22 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, default: return MCDisassembler::Fail; case 0: - if (fieldFromInstruction32(Insn, 4, 1)) + if (fieldFromInstruction(Insn, 4, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 5, 3); + index = fieldFromInstruction(Insn, 5, 3); break; case 1: - if (fieldFromInstruction32(Insn, 5, 1)) + if (fieldFromInstruction(Insn, 5, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 6, 2); - if (fieldFromInstruction32(Insn, 4, 1)) + index = fieldFromInstruction(Insn, 6, 2); + if (fieldFromInstruction(Insn, 4, 1)) align = 2; break; case 2: - if (fieldFromInstruction32(Insn, 6, 1)) + if (fieldFromInstruction(Insn, 6, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 7, 1); - if (fieldFromInstruction32(Insn, 4, 2) != 0) + index = fieldFromInstruction(Insn, 7, 1); + if (fieldFromInstruction(Insn, 4, 2) != 0) align = 4; } @@ -3735,11 +3742,11 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned size = fieldFromInstruction32(Insn, 10, 2); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction(Insn, 10, 2); unsigned align = 0; unsigned index = 0; @@ -3747,22 +3754,22 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, default: return MCDisassembler::Fail; case 0: - if (fieldFromInstruction32(Insn, 4, 1)) + if (fieldFromInstruction(Insn, 4, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 5, 3); + index = fieldFromInstruction(Insn, 5, 3); break; case 1: - if (fieldFromInstruction32(Insn, 5, 1)) + if (fieldFromInstruction(Insn, 5, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 6, 2); - if (fieldFromInstruction32(Insn, 4, 1)) + index = fieldFromInstruction(Insn, 6, 2); + if (fieldFromInstruction(Insn, 4, 1)) align = 2; break; case 2: - if (fieldFromInstruction32(Insn, 6, 1)) + if (fieldFromInstruction(Insn, 6, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 7, 1); - if (fieldFromInstruction32(Insn, 4, 2) != 0) + index = fieldFromInstruction(Insn, 7, 1); + if (fieldFromInstruction(Insn, 4, 2) != 0) align = 4; } @@ -3793,11 +3800,11 @@ static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned size = fieldFromInstruction32(Insn, 10, 2); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction(Insn, 10, 2); unsigned align = 0; unsigned index = 0; @@ -3806,24 +3813,24 @@ static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, default: return MCDisassembler::Fail; case 0: - index = fieldFromInstruction32(Insn, 5, 3); - if (fieldFromInstruction32(Insn, 4, 1)) + index = fieldFromInstruction(Insn, 5, 3); + if (fieldFromInstruction(Insn, 4, 1)) align = 2; break; case 1: - index = fieldFromInstruction32(Insn, 6, 2); - if (fieldFromInstruction32(Insn, 4, 1)) + index = fieldFromInstruction(Insn, 6, 2); + if (fieldFromInstruction(Insn, 4, 1)) align = 4; - if (fieldFromInstruction32(Insn, 5, 1)) + if (fieldFromInstruction(Insn, 5, 1)) inc = 2; break; case 2: - if (fieldFromInstruction32(Insn, 5, 1)) + if (fieldFromInstruction(Insn, 5, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 7, 1); - if (fieldFromInstruction32(Insn, 4, 1) != 0) + index = fieldFromInstruction(Insn, 7, 1); + if (fieldFromInstruction(Insn, 4, 1) != 0) align = 8; - if (fieldFromInstruction32(Insn, 6, 1)) + if (fieldFromInstruction(Insn, 6, 1)) inc = 2; break; } @@ -3860,11 +3867,11 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned size = fieldFromInstruction32(Insn, 10, 2); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction(Insn, 10, 2); unsigned align = 0; unsigned index = 0; @@ -3873,24 +3880,24 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, default: return MCDisassembler::Fail; case 0: - index = fieldFromInstruction32(Insn, 5, 3); - if (fieldFromInstruction32(Insn, 4, 1)) + index = fieldFromInstruction(Insn, 5, 3); + if (fieldFromInstruction(Insn, 4, 1)) align = 2; break; case 1: - index = fieldFromInstruction32(Insn, 6, 2); - if (fieldFromInstruction32(Insn, 4, 1)) + index = fieldFromInstruction(Insn, 6, 2); + if (fieldFromInstruction(Insn, 4, 1)) align = 4; - if (fieldFromInstruction32(Insn, 5, 1)) + if (fieldFromInstruction(Insn, 5, 1)) inc = 2; break; case 2: - if (fieldFromInstruction32(Insn, 5, 1)) + if (fieldFromInstruction(Insn, 5, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 7, 1); - if (fieldFromInstruction32(Insn, 4, 1) != 0) + index = fieldFromInstruction(Insn, 7, 1); + if (fieldFromInstruction(Insn, 4, 1) != 0) align = 8; - if (fieldFromInstruction32(Insn, 6, 1)) + if (fieldFromInstruction(Insn, 6, 1)) inc = 2; break; } @@ -3924,11 +3931,11 @@ static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned size = fieldFromInstruction32(Insn, 10, 2); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction(Insn, 10, 2); unsigned align = 0; unsigned index = 0; @@ -3937,22 +3944,22 @@ static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, default: return MCDisassembler::Fail; case 0: - if (fieldFromInstruction32(Insn, 4, 1)) + if (fieldFromInstruction(Insn, 4, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 5, 3); + index = fieldFromInstruction(Insn, 5, 3); break; case 1: - if (fieldFromInstruction32(Insn, 4, 1)) + if (fieldFromInstruction(Insn, 4, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 6, 2); - if (fieldFromInstruction32(Insn, 5, 1)) + index = fieldFromInstruction(Insn, 6, 2); + if (fieldFromInstruction(Insn, 5, 1)) inc = 2; break; case 2: - if (fieldFromInstruction32(Insn, 4, 2)) + if (fieldFromInstruction(Insn, 4, 2)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 7, 1); - if (fieldFromInstruction32(Insn, 6, 1)) + index = fieldFromInstruction(Insn, 7, 1); + if (fieldFromInstruction(Insn, 6, 1)) inc = 2; break; } @@ -3994,11 +4001,11 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned size = fieldFromInstruction32(Insn, 10, 2); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction(Insn, 10, 2); unsigned align = 0; unsigned index = 0; @@ -4007,22 +4014,22 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, default: return MCDisassembler::Fail; case 0: - if (fieldFromInstruction32(Insn, 4, 1)) + if (fieldFromInstruction(Insn, 4, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 5, 3); + index = fieldFromInstruction(Insn, 5, 3); break; case 1: - if (fieldFromInstruction32(Insn, 4, 1)) + if (fieldFromInstruction(Insn, 4, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 6, 2); - if (fieldFromInstruction32(Insn, 5, 1)) + index = fieldFromInstruction(Insn, 6, 2); + if (fieldFromInstruction(Insn, 5, 1)) inc = 2; break; case 2: - if (fieldFromInstruction32(Insn, 4, 2)) + if (fieldFromInstruction(Insn, 4, 2)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 7, 1); - if (fieldFromInstruction32(Insn, 6, 1)) + index = fieldFromInstruction(Insn, 7, 1); + if (fieldFromInstruction(Insn, 6, 1)) inc = 2; break; } @@ -4058,11 +4065,11 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned size = fieldFromInstruction32(Insn, 10, 2); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction(Insn, 10, 2); unsigned align = 0; unsigned index = 0; @@ -4071,22 +4078,22 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, default: return MCDisassembler::Fail; case 0: - if (fieldFromInstruction32(Insn, 4, 1)) + if (fieldFromInstruction(Insn, 4, 1)) align = 4; - index = fieldFromInstruction32(Insn, 5, 3); + index = fieldFromInstruction(Insn, 5, 3); break; case 1: - if (fieldFromInstruction32(Insn, 4, 1)) + if (fieldFromInstruction(Insn, 4, 1)) align = 8; - index = fieldFromInstruction32(Insn, 6, 2); - if (fieldFromInstruction32(Insn, 5, 1)) + index = fieldFromInstruction(Insn, 6, 2); + if (fieldFromInstruction(Insn, 5, 1)) inc = 2; break; case 2: - if (fieldFromInstruction32(Insn, 4, 2)) - align = 4 << fieldFromInstruction32(Insn, 4, 2); - index = fieldFromInstruction32(Insn, 7, 1); - if (fieldFromInstruction32(Insn, 6, 1)) + if (fieldFromInstruction(Insn, 4, 2)) + align = 4 << fieldFromInstruction(Insn, 4, 2); + index = fieldFromInstruction(Insn, 7, 1); + if (fieldFromInstruction(Insn, 6, 1)) inc = 2; break; } @@ -4132,11 +4139,11 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned size = fieldFromInstruction32(Insn, 10, 2); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction(Insn, 10, 2); unsigned align = 0; unsigned index = 0; @@ -4145,22 +4152,22 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, default: return MCDisassembler::Fail; case 0: - if (fieldFromInstruction32(Insn, 4, 1)) + if (fieldFromInstruction(Insn, 4, 1)) align = 4; - index = fieldFromInstruction32(Insn, 5, 3); + index = fieldFromInstruction(Insn, 5, 3); break; case 1: - if (fieldFromInstruction32(Insn, 4, 1)) + if (fieldFromInstruction(Insn, 4, 1)) align = 8; - index = fieldFromInstruction32(Insn, 6, 2); - if (fieldFromInstruction32(Insn, 5, 1)) + index = fieldFromInstruction(Insn, 6, 2); + if (fieldFromInstruction(Insn, 5, 1)) inc = 2; break; case 2: - if (fieldFromInstruction32(Insn, 4, 2)) - align = 4 << fieldFromInstruction32(Insn, 4, 2); - index = fieldFromInstruction32(Insn, 7, 1); - if (fieldFromInstruction32(Insn, 6, 1)) + if (fieldFromInstruction(Insn, 4, 2)) + align = 4 << fieldFromInstruction(Insn, 4, 2); + index = fieldFromInstruction(Insn, 7, 1); + if (fieldFromInstruction(Insn, 6, 1)) inc = 2; break; } @@ -4196,11 +4203,11 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 5, 1); - unsigned pred = fieldFromInstruction32(Insn, 28, 4); - Rm |= fieldFromInstruction32(Insn, 0, 4) << 1; + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned Rt2 = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 5, 1); + unsigned pred = fieldFromInstruction(Insn, 28, 4); + Rm |= fieldFromInstruction(Insn, 0, 4) << 1; if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) S = MCDisassembler::SoftFail; @@ -4222,11 +4229,11 @@ static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 5, 1); - unsigned pred = fieldFromInstruction32(Insn, 28, 4); - Rm |= fieldFromInstruction32(Insn, 0, 4) << 1; + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned Rt2 = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 5, 1); + unsigned pred = fieldFromInstruction(Insn, 28, 4); + Rm |= fieldFromInstruction(Insn, 0, 4) << 1; if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) S = MCDisassembler::SoftFail; @@ -4248,8 +4255,8 @@ static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned pred = fieldFromInstruction16(Insn, 4, 4); - unsigned mask = fieldFromInstruction16(Insn, 0, 4); + unsigned pred = fieldFromInstruction(Insn, 4, 4); + unsigned mask = fieldFromInstruction(Insn, 0, 4); if (pred == 0xF) { pred = 0xE; @@ -4271,13 +4278,13 @@ DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4); - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned addr = fieldFromInstruction32(Insn, 0, 8); - unsigned W = fieldFromInstruction32(Insn, 21, 1); - unsigned U = fieldFromInstruction32(Insn, 23, 1); - unsigned P = fieldFromInstruction32(Insn, 24, 1); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned Rt2 = fieldFromInstruction(Insn, 8, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned addr = fieldFromInstruction(Insn, 0, 8); + unsigned W = fieldFromInstruction(Insn, 21, 1); + unsigned U = fieldFromInstruction(Insn, 23, 1); + unsigned P = fieldFromInstruction(Insn, 24, 1); bool writeback = (W == 1) | (P == 0); addr |= (U << 8) | (Rn << 9); @@ -4308,13 +4315,13 @@ DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4); - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned addr = fieldFromInstruction32(Insn, 0, 8); - unsigned W = fieldFromInstruction32(Insn, 21, 1); - unsigned U = fieldFromInstruction32(Insn, 23, 1); - unsigned P = fieldFromInstruction32(Insn, 24, 1); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned Rt2 = fieldFromInstruction(Insn, 8, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned addr = fieldFromInstruction(Insn, 0, 8); + unsigned W = fieldFromInstruction(Insn, 21, 1); + unsigned U = fieldFromInstruction(Insn, 23, 1); + unsigned P = fieldFromInstruction(Insn, 24, 1); bool writeback = (W == 1) | (P == 0); addr |= (U << 8) | (Rn << 9); @@ -4340,13 +4347,13 @@ DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn, uint64_t Address, const void *Decoder) { - unsigned sign1 = fieldFromInstruction32(Insn, 21, 1); - unsigned sign2 = fieldFromInstruction32(Insn, 23, 1); + unsigned sign1 = fieldFromInstruction(Insn, 21, 1); + unsigned sign2 = fieldFromInstruction(Insn, 23, 1); if (sign1 != sign2) return MCDisassembler::Fail; - unsigned Val = fieldFromInstruction32(Insn, 0, 8); - Val |= fieldFromInstruction32(Insn, 12, 3) << 8; - Val |= fieldFromInstruction32(Insn, 26, 1) << 11; + unsigned Val = fieldFromInstruction(Insn, 0, 8); + Val |= fieldFromInstruction(Insn, 12, 3) << 8; + Val |= fieldFromInstruction(Insn, 26, 1) << 11; Val |= sign1 << 12; Inst.addOperand(MCOperand::CreateImm(SignExtend32<13>(Val))); @@ -4366,10 +4373,10 @@ static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val, static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned Rt2 = fieldFromInstruction32(Insn, 0, 4); - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned Rt2 = fieldFromInstruction(Insn, 0, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned pred = fieldFromInstruction(Insn, 28, 4); if (pred == 0xF) return DecodeCPSInstruction(Inst, Insn, Address, Decoder); @@ -4393,12 +4400,12 @@ static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { - unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0); - Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4); - unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0); - Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4); - unsigned imm = fieldFromInstruction32(Insn, 16, 6); - unsigned cmode = fieldFromInstruction32(Insn, 8, 4); + unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0); + Vd |= (fieldFromInstruction(Insn, 22, 1) << 4); + unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0); + Vm |= (fieldFromInstruction(Insn, 5, 1) << 4); + unsigned imm = fieldFromInstruction(Insn, 16, 6); + unsigned cmode = fieldFromInstruction(Insn, 8, 4); DecodeStatus S = MCDisassembler::Success; @@ -4421,12 +4428,12 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { - unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0); - Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4); - unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0); - Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4); - unsigned imm = fieldFromInstruction32(Insn, 16, 6); - unsigned cmode = fieldFromInstruction32(Insn, 8, 4); + unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0); + Vd |= (fieldFromInstruction(Insn, 22, 1) << 4); + unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0); + Vm |= (fieldFromInstruction(Insn, 5, 1) << 4); + unsigned imm = fieldFromInstruction(Insn, 16, 6); + unsigned cmode = fieldFromInstruction(Insn, 8, 4); DecodeStatus S = MCDisassembler::Success; @@ -4451,13 +4458,13 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Val, 16, 4); - unsigned Rt = fieldFromInstruction32(Val, 12, 4); - unsigned Rm = fieldFromInstruction32(Val, 0, 4); - Rm |= (fieldFromInstruction32(Val, 23, 1) << 4); - unsigned Cond = fieldFromInstruction32(Val, 28, 4); + unsigned Rn = fieldFromInstruction(Val, 16, 4); + unsigned Rt = fieldFromInstruction(Val, 12, 4); + unsigned Rm = fieldFromInstruction(Val, 0, 4); + Rm |= (fieldFromInstruction(Val, 23, 1) << 4); + unsigned Cond = fieldFromInstruction(Val, 28, 4); - if (fieldFromInstruction32(Val, 8, 4) != 0 || Rn == Rt) + if (fieldFromInstruction(Val, 8, 4) != 0 || Rn == Rt) S = MCDisassembler::SoftFail; if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder))) @@ -4479,11 +4486,11 @@ static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val, DecodeStatus S = MCDisassembler::Success; - unsigned CRm = fieldFromInstruction32(Val, 0, 4); - unsigned opc1 = fieldFromInstruction32(Val, 4, 4); - unsigned cop = fieldFromInstruction32(Val, 8, 4); - unsigned Rt = fieldFromInstruction32(Val, 12, 4); - unsigned Rt2 = fieldFromInstruction32(Val, 16, 4); + unsigned CRm = fieldFromInstruction(Val, 0, 4); + unsigned opc1 = fieldFromInstruction(Val, 4, 4); + unsigned cop = fieldFromInstruction(Val, 8, 4); + unsigned Rt = fieldFromInstruction(Val, 12, 4); + unsigned Rt2 = fieldFromInstruction(Val, 16, 4); if ((cop & ~0x1) == 0xa) return MCDisassembler::Fail; diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 2f6b1b0..8b9109e 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -792,6 +792,25 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum, llvm_unreachable("Unhandled PC-relative pseudo-instruction!"); } +void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + + if (MO.isExpr()) { + O << *MO.getExpr(); + return; + } + + int32_t OffImm = (int32_t)MO.getImm(); + + if (OffImm == INT32_MIN) + O << "#-0"; + else if (OffImm < 0) + O << "#-" << -OffImm; + else + O << "#" << OffImm; +} + void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { O << "#" << MI->getOperand(OpNum).getImm() * 4; @@ -953,12 +972,17 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, O << "[" << getRegisterName(MO1.getReg()); - int32_t OffImm = (int32_t)MO2.getImm() / 4; + int32_t OffImm = (int32_t)MO2.getImm(); + + assert(((OffImm & 0x3) == 0) && "Not a valid immediate!"); + // Don't print +0. - if (OffImm < 0) - O << ", #-" << -OffImm * 4; + if (OffImm == INT32_MIN) + O << ", #-0"; + else if (OffImm < 0) + O << ", #-" << -OffImm; else if (OffImm > 0) - O << ", #" << OffImm * 4; + O << ", #" << OffImm; O << "]"; } @@ -990,15 +1014,17 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - int32_t OffImm = (int32_t)MO1.getImm() / 4; + int32_t OffImm = (int32_t)MO1.getImm(); + + assert(((OffImm & 0x3) == 0) && "Not a valid immediate!"); + // Don't print +0. - if (OffImm != 0) { - O << ", "; - if (OffImm < 0) - O << "#-" << -OffImm * 4; - else if (OffImm > 0) - O << "#" << OffImm * 4; - } + if (OffImm == INT32_MIN) + O << ", #-0"; + else if (OffImm < 0) + O << ", #-" << -OffImm; + else if (OffImm > 0) + O << ", #" << OffImm; } void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 8acb7ee..73d7bfd 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -73,6 +73,7 @@ public: void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O); diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index ae11be8..de48a0e 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -120,14 +120,22 @@ namespace ARM_MB { // The Memory Barrier Option constants map directly to the 4-bit encoding of // the option field for memory barrier operations. enum MemBOpt { - SY = 15, - ST = 14, - ISH = 11, - ISHST = 10, - NSH = 7, - NSHST = 6, + RESERVED_0 = 0, + RESERVED_1 = 1, + OSHST = 2, OSH = 3, - OSHST = 2 + RESERVED_4 = 4, + RESERVED_5 = 5, + NSHST = 6, + NSH = 7, + RESERVED_8 = 8, + RESERVED_9 = 9, + ISHST = 10, + ISH = 11, + RESERVED_12 = 12, + RESERVED_13 = 13, + ST = 14, + SY = 15 }; inline static const char *MemBOptToString(unsigned val) { @@ -135,92 +143,24 @@ namespace ARM_MB { default: llvm_unreachable("Unknown memory operation"); case SY: return "sy"; case ST: return "st"; + case RESERVED_13: return "#0xd"; + case RESERVED_12: return "#0xc"; case ISH: return "ish"; case ISHST: return "ishst"; + case RESERVED_9: return "#0x9"; + case RESERVED_8: return "#0x8"; case NSH: return "nsh"; case NSHST: return "nshst"; + case RESERVED_5: return "#0x5"; + case RESERVED_4: return "#0x4"; case OSH: return "osh"; case OSHST: return "oshst"; + case RESERVED_1: return "#0x1"; + case RESERVED_0: return "#0x0"; } } } // namespace ARM_MB -/// getARMRegisterNumbering - Given the enum value for some register, e.g. -/// ARM::LR, return the number that it corresponds to (e.g. 14). -inline static unsigned getARMRegisterNumbering(unsigned Reg) { - using namespace ARM; - switch (Reg) { - default: - llvm_unreachable("Unknown ARM register!"); - case R0: case S0: case D0: case Q0: return 0; - case R1: case S1: case D1: case Q1: return 1; - case R2: case S2: case D2: case Q2: return 2; - case R3: case S3: case D3: case Q3: return 3; - case R4: case S4: case D4: case Q4: return 4; - case R5: case S5: case D5: case Q5: return 5; - case R6: case S6: case D6: case Q6: return 6; - case R7: case S7: case D7: case Q7: return 7; - case R8: case S8: case D8: case Q8: return 8; - case R9: case S9: case D9: case Q9: return 9; - case R10: case S10: case D10: case Q10: return 10; - case R11: case S11: case D11: case Q11: return 11; - case R12: case S12: case D12: case Q12: return 12; - case SP: case S13: case D13: case Q13: return 13; - case LR: case S14: case D14: case Q14: return 14; - case PC: case S15: case D15: case Q15: return 15; - - case S16: case D16: return 16; - case S17: case D17: return 17; - case S18: case D18: return 18; - case S19: case D19: return 19; - case S20: case D20: return 20; - case S21: case D21: return 21; - case S22: case D22: return 22; - case S23: case D23: return 23; - case S24: case D24: return 24; - case S25: case D25: return 25; - case S26: case D26: return 26; - case S27: case D27: return 27; - case S28: case D28: return 28; - case S29: case D29: return 29; - case S30: case D30: return 30; - case S31: case D31: return 31; - - // Composite registers use the regnum of the first register in the list. - /* Q0 */ case D0_D2: return 0; - case D1_D2: case D1_D3: return 1; - /* Q1 */ case D2_D4: return 2; - case D3_D4: case D3_D5: return 3; - /* Q2 */ case D4_D6: return 4; - case D5_D6: case D5_D7: return 5; - /* Q3 */ case D6_D8: return 6; - case D7_D8: case D7_D9: return 7; - /* Q4 */ case D8_D10: return 8; - case D9_D10: case D9_D11: return 9; - /* Q5 */ case D10_D12: return 10; - case D11_D12: case D11_D13: return 11; - /* Q6 */ case D12_D14: return 12; - case D13_D14: case D13_D15: return 13; - /* Q7 */ case D14_D16: return 14; - case D15_D16: case D15_D17: return 15; - /* Q8 */ case D16_D18: return 16; - case D17_D18: case D17_D19: return 17; - /* Q9 */ case D18_D20: return 18; - case D19_D20: case D19_D21: return 19; - /* Q10 */ case D20_D22: return 20; - case D21_D22: case D21_D23: return 21; - /* Q11 */ case D22_D24: return 22; - case D23_D24: case D23_D25: return 23; - /* Q12 */ case D24_D26: return 24; - case D25_D26: case D25_D27: return 25; - /* Q13 */ case D26_D28: return 26; - case D27_D28: case D27_D29: return 27; - /* Q14 */ case D28_D30: return 28; - case D29_D30: case D29_D31: return 29; - /* Q15 */ - } -} - /// isARMLowRegister - Returns true if the register is a low register (r0-r7). /// static inline bool isARMLowRegister(unsigned Reg) { diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 1964bcd..94f1082 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -18,6 +18,7 @@ #include "MCTargetDesc/ARMMCExpr.h" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" @@ -38,11 +39,12 @@ class ARMMCCodeEmitter : public MCCodeEmitter { void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT const MCInstrInfo &MCII; const MCSubtargetInfo &STI; + const MCContext &CTX; public: ARMMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, MCContext &ctx) - : MCII(mcii), STI(sti) { + : MCII(mcii), STI(sti), CTX(ctx) { } ~ARMMCCodeEmitter() {} @@ -405,7 +407,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups) const { if (MO.isReg()) { unsigned Reg = MO.getReg(); - unsigned RegNo = getARMRegisterNumbering(Reg); + unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg); // Q registers are encoded as 2x their register number. switch (Reg) { @@ -434,7 +436,7 @@ EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - Reg = getARMRegisterNumbering(MO.getReg()); + Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); int32_t SImm = MO1.getImm(); bool isAdd = true; @@ -641,8 +643,8 @@ getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, return Val; } -/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label -/// target. +/// getAdrLabelOpValue - Return encoding info for 12-bit shifted-immediate +/// ADR label target. uint32_t ARMMCCodeEmitter:: getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { @@ -652,15 +654,23 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, Fixups); int32_t offset = MO.getImm(); uint32_t Val = 0x2000; - if (offset < 0) { + + if (offset == INT32_MIN) { + Val = 0x1000; + offset = 0; + } else if (offset < 0) { Val = 0x1000; offset *= -1; } - Val |= offset; + + int SoImmVal = ARM_AM::getSOImmVal(offset); + assert(SoImmVal != -1 && "Not a valid so_imm value!"); + + Val |= SoImmVal; return Val; } -/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label +/// getT2AdrLabelOpValue - Return encoding info for 12-bit immediate ADR label /// target. uint32_t ARMMCCodeEmitter:: getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx, @@ -670,14 +680,16 @@ getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx, return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12, Fixups); int32_t Val = MO.getImm(); - if (Val < 0) { + if (Val == INT32_MIN) + Val = 0x1000; + else if (Val < 0) { Val *= -1; Val |= 0x1000; } return Val; } -/// getAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label +/// getThumbAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label /// target. uint32_t ARMMCCodeEmitter:: getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, @@ -699,8 +711,8 @@ getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx, // {2-0} = Rn const MCOperand &MO1 = MI.getOperand(OpIdx); const MCOperand &MO2 = MI.getOperand(OpIdx + 1); - unsigned Rn = getARMRegisterNumbering(MO1.getReg()); - unsigned Rm = getARMRegisterNumbering(MO2.getReg()); + unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); + unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO2.getReg()); return (Rm << 3) | Rn; } @@ -716,7 +728,7 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. const MCOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) { - Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC. + Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. Imm12 = 0; isAdd = false ; // 'U' bit is set as part of the fixup. @@ -796,7 +808,7 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. const MCOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) { - Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC. + Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. Imm8 = 0; isAdd = false ; // 'U' bit is set as part of the fixup. @@ -832,7 +844,7 @@ getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx, // {7-0} = imm8 const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - unsigned Reg = getARMRegisterNumbering(MO.getReg()); + unsigned Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); unsigned Imm8 = MO1.getImm(); return (Reg << 8) | Imm8; } @@ -915,8 +927,8 @@ getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx+1); const MCOperand &MO2 = MI.getOperand(OpIdx+2); - unsigned Rn = getARMRegisterNumbering(MO.getReg()); - unsigned Rm = getARMRegisterNumbering(MO1.getReg()); + unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()); bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add; ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm()); @@ -946,7 +958,7 @@ getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx, // {12} isAdd // {11-0} imm12/Rm const MCOperand &MO = MI.getOperand(OpIdx); - unsigned Rn = getARMRegisterNumbering(MO.getReg()); + unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups); Binary |= Rn << 14; return Binary; @@ -969,7 +981,7 @@ getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm); Binary <<= 7; // Shift amount is bits [11:7] Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5] - Binary |= getARMRegisterNumbering(MO.getReg()); // Rm is bits [3:0] + Binary |= CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Rm is bits [3:0] } return Binary | (isAdd << 12) | (isReg << 13); } @@ -982,7 +994,7 @@ getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx+1); bool isAdd = MO1.getImm() != 0; - return getARMRegisterNumbering(MO.getReg()) | (isAdd << 4); + return CTX.getRegisterInfo().getEncodingValue(MO.getReg()) | (isAdd << 4); } uint32_t ARMMCCodeEmitter:: @@ -1000,7 +1012,7 @@ getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx, uint32_t Imm8 = ARM_AM::getAM3Offset(Imm); // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8 if (!isImm) - Imm8 = getARMRegisterNumbering(MO.getReg()); + Imm8 = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); return Imm8 | (isAdd << 8) | (isImm << 9); } @@ -1018,7 +1030,7 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. if (!MO.isReg()) { - unsigned Rn = getARMRegisterNumbering(ARM::PC); // Rn is PC. + unsigned Rn = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. assert(MO.isExpr() && "Unexpected machine operand type!"); const MCExpr *Expr = MO.getExpr(); @@ -1028,14 +1040,14 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx, ++MCNumCPRelocations; return (Rn << 9) | (1 << 13); } - unsigned Rn = getARMRegisterNumbering(MO.getReg()); + unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); unsigned Imm = MO2.getImm(); bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add; bool isImm = MO1.getReg() == 0; uint32_t Imm8 = ARM_AM::getAM3Offset(Imm); // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8 if (!isImm) - Imm8 = getARMRegisterNumbering(MO1.getReg()); + Imm8 = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13); } @@ -1063,7 +1075,7 @@ getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx, // {2-0} = Rn const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - unsigned Rn = getARMRegisterNumbering(MO.getReg()); + unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); unsigned Imm5 = MO1.getImm(); return ((Imm5 & 0x1f) << 3) | Rn; } @@ -1090,7 +1102,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. const MCOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) { - Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC. + Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. Imm8 = 0; isAdd = false; // 'U' bit is handled as part of the fixup. @@ -1136,7 +1148,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm()); // Encode Rm. - unsigned Binary = getARMRegisterNumbering(MO.getReg()); + unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Encode the shift opcode. unsigned SBits = 0; @@ -1161,7 +1173,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx, // Encode the shift operation Rs. // Encode Rs bit[11:8]. assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); - return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift); + return Binary | (CTX.getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift); } unsigned ARMMCCodeEmitter:: @@ -1180,7 +1192,7 @@ getSORegImmOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm()); // Encode Rm. - unsigned Binary = getARMRegisterNumbering(MO.getReg()); + unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Encode the shift opcode. unsigned SBits = 0; @@ -1219,9 +1231,9 @@ getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum, // Encoded as [Rn, Rm, imm]. // FIXME: Needs fixup support. - unsigned Value = getARMRegisterNumbering(MO1.getReg()); + unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); Value <<= 4; - Value |= getARMRegisterNumbering(MO2.getReg()); + Value |= CTX.getRegisterInfo().getEncodingValue(MO2.getReg()); Value <<= 2; Value |= MO3.getImm(); @@ -1235,7 +1247,7 @@ getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum, const MCOperand &MO2 = MI.getOperand(OpNum+1); // FIXME: Needs fixup support. - unsigned Value = getARMRegisterNumbering(MO1.getReg()); + unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); // Even though the immediate is 8 bits long, we need 9 bits in order // to represent the (inverse of the) sign bit. @@ -1297,7 +1309,7 @@ getT2SORegOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm()); // Encode Rm. - unsigned Binary = getARMRegisterNumbering(MO.getReg()); + unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Encode the shift opcode. unsigned SBits = 0; @@ -1353,7 +1365,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, if (SPRRegs || DPRRegs) { // VLDM/VSTM - unsigned RegNo = getARMRegisterNumbering(Reg); + unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg); unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff; Binary |= (RegNo & 0x1f) << 8; if (SPRRegs) @@ -1362,7 +1374,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, Binary |= NumRegs * 2; } else { for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) { - unsigned RegNo = getARMRegisterNumbering(MI.getOperand(I).getReg()); + unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(MI.getOperand(I).getReg()); Binary |= 1 << RegNo; } } @@ -1378,7 +1390,7 @@ getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op, const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); - unsigned RegNo = getARMRegisterNumbering(Reg.getReg()); + unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg()); unsigned Align = 0; switch (Imm.getImm()) { @@ -1401,7 +1413,7 @@ getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op, const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); - unsigned RegNo = getARMRegisterNumbering(Reg.getReg()); + unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg()); unsigned Align = 0; switch (Imm.getImm()) { @@ -1427,7 +1439,7 @@ getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op, const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); - unsigned RegNo = getARMRegisterNumbering(Reg.getReg()); + unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg()); unsigned Align = 0; switch (Imm.getImm()) { @@ -1446,7 +1458,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(Op); if (MO.getReg() == 0) return 0x0D; - return getARMRegisterNumbering(MO.getReg()); + return CTX.getRegisterInfo().getEncodingValue(MO.getReg()); } unsigned ARMMCCodeEmitter:: diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 78faf59..a51e0fa 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -408,15 +408,22 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // Even when it's not a scattered relocation, movw/movt always uses // a PAIR relocation. if (Type == macho::RIT_ARM_Half) { - // The other-half value only gets populated for the movt relocation. + // The other-half value only gets populated for the movt and movw + // relocation entries. uint32_t Value = 0;; switch ((unsigned)Fixup.getKind()) { default: break; + case ARM::fixup_arm_movw_lo16: + case ARM::fixup_arm_movw_lo16_pcrel: + case ARM::fixup_t2_movw_lo16: + case ARM::fixup_t2_movw_lo16_pcrel: + Value = (FixedValue >> 16) & 0xffff; + break; case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movt_hi16_pcrel: case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movt_hi16_pcrel: - Value = FixedValue; + Value = FixedValue & 0xffff; break; } macho::RelocationEntry MREPair; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 2097bb9..e9e20dd 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -563,48 +563,6 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, return Offset == 0; } -/// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the -/// two-addrss instruction inserted by two-address pass. -void -Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI, - MachineInstr *UseMI, - const TargetRegisterInfo &TRI) const { - if (SrcMI->getOpcode() != ARM::tMOVr || SrcMI->getOperand(1).isKill()) - return; - - unsigned PredReg = 0; - ARMCC::CondCodes CC = getInstrPredicate(UseMI, PredReg); - if (CC == ARMCC::AL || PredReg != ARM::CPSR) - return; - - // Schedule the copy so it doesn't come between previous instructions - // and UseMI which can form an IT block. - unsigned SrcReg = SrcMI->getOperand(1).getReg(); - ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); - MachineBasicBlock *MBB = UseMI->getParent(); - MachineBasicBlock::iterator MBBI = SrcMI; - unsigned NumInsts = 0; - while (--MBBI != MBB->begin()) { - if (MBBI->isDebugValue()) - continue; - - MachineInstr *NMI = &*MBBI; - ARMCC::CondCodes NCC = getInstrPredicate(NMI, PredReg); - if (!(NCC == CC || NCC == OCC) || - NMI->modifiesRegister(SrcReg, &TRI) || - NMI->modifiesRegister(ARM::CPSR, &TRI)) - break; - if (++NumInsts == 4) - // Too many in a row! - return; - } - - if (NumInsts) { - MBB->remove(SrcMI); - MBB->insert(++MBBI, SrcMI); - } -} - ARMCC::CondCodes llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { unsigned Opc = MI->getOpcode(); diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 0911f8a..2cdcd06 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -57,11 +57,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the - /// two-addrss instruction inserted by two-address pass. - void scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI, - const TargetRegisterInfo &TRI) const; - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index c8e757b..4ddcd38 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -285,14 +285,14 @@ void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) { Out << "GlobalValue::LinkerPrivateLinkage"; break; case GlobalValue::LinkerPrivateWeakLinkage: Out << "GlobalValue::LinkerPrivateWeakLinkage"; break; - case GlobalValue::LinkerPrivateWeakDefAutoLinkage: - Out << "GlobalValue::LinkerPrivateWeakDefAutoLinkage"; break; case GlobalValue::AvailableExternallyLinkage: Out << "GlobalValue::AvailableExternallyLinkage "; break; case GlobalValue::LinkOnceAnyLinkage: Out << "GlobalValue::LinkOnceAnyLinkage "; break; case GlobalValue::LinkOnceODRLinkage: Out << "GlobalValue::LinkOnceODRLinkage "; break; + case GlobalValue::LinkOnceODRAutoHideLinkage: + Out << "GlobalValue::LinkOnceODRAutoHideLinkage"; break; case GlobalValue::WeakAnyLinkage: Out << "GlobalValue::WeakAnyLinkage"; break; case GlobalValue::WeakODRLinkage: diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 1357cc5..d756aec 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -328,7 +328,10 @@ CountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const { // can get a useful trip count. The trip count can // be either a register or an immediate. The location // of the value depends upon the type (reg or imm). - while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) { + for (MachineRegisterInfo::reg_iterator + RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); + RI != RE; ++RI) { + IV_Opnd = &RI.getOperand(); const MachineInstr *MI = IV_Opnd->getParent(); if (L->contains(MI) && isCompareEqualsImm(MI)) { const MachineOperand &MO = MI->getOperand(2); diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index c7be5ce..c0c0df6 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -2580,22 +2580,16 @@ let isCall = 1, neverHasSideEffects = 1, } // Tail Calls. -let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, - Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, - R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { +let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in { def TCRETURNtg : JInst<(outs), (ins calltarget:$dst), "jump $dst // TAILCALL", []>; } -let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, - Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, - R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { +let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in { def TCRETURNtext : JInst<(outs), (ins calltarget:$dst), "jump $dst // TAILCALL", []>; } -let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, - Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, - R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { +let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in { def TCRETURNR : JInst<(outs), (ins IntRegs:$dst), "jumpr $dst // TAILCALL", []>; } diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index 5d087db..4bacb8f 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -40,28 +40,27 @@ EnableIEEERndNear( HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS): HexagonGenSubtargetInfo(TT, CPU, FS), - HexagonArchVersion(V2), CPUString(CPU.str()) { - ParseSubtargetFeatures(CPU, FS); - switch(HexagonArchVersion) { - case HexagonSubtarget::V2: - break; - case HexagonSubtarget::V3: - EnableV3 = true; - break; - case HexagonSubtarget::V4: - break; - case HexagonSubtarget::V5: - break; - default: - // If the programmer has not specified a Hexagon version, default - // to -mv4. + // If the programmer has not specified a Hexagon version, default to -mv4. + if (CPUString.empty()) CPUString = "hexagonv4"; - HexagonArchVersion = HexagonSubtarget::V4; - break; + + if (CPUString == "hexagonv2") { + HexagonArchVersion = V2; + } else if (CPUString == "hexagonv3") { + EnableV3 = true; + HexagonArchVersion = V3; + } else if (CPUString == "hexagonv4") { + HexagonArchVersion = V4; + } else if (CPUString == "hexagonv5") { + HexagonArchVersion = V5; + } else { + llvm_unreachable("Unrecognized Hexagon processor version"); } + ParseSubtargetFeatures(CPUString, FS); + // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp index 786a0c5..05f6fa6 100644 --- a/lib/Target/Mangler.cpp +++ b/lib/Target/Mangler.cpp @@ -183,8 +183,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName, ManglerPrefixTy PrefixTy = Mangler::Default; if (GV->hasPrivateLinkage() || isImplicitlyPrivate) PrefixTy = Mangler::Private; - else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage() || - GV->hasLinkerPrivateWeakDefAutoLinkage()) + else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage()) PrefixTy = Mangler::LinkerPrivate; // If this global has a name, handle it simply. diff --git a/lib/Target/Mips/AsmParser/CMakeLists.txt b/lib/Target/Mips/AsmParser/CMakeLists.txt index 6c7343b..28f5219 100644 --- a/lib/Target/Mips/AsmParser/CMakeLists.txt +++ b/lib/Target/Mips/AsmParser/CMakeLists.txt @@ -1,3 +1,4 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMMipsAsmParser MipsAsmParser.cpp ) diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 58b5590..43bd345 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -11,11 +11,20 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/MathExtras.h" using namespace llvm; namespace { class MipsAsmParser : public MCTargetAsmParser { + +#define GET_ASSEMBLER_HEADER +#include "MipsGenAsmMatcher.inc" + bool MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out); @@ -23,10 +32,11 @@ class MipsAsmParser : public MCTargetAsmParser { bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); bool ParseInstruction(StringRef Name, SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands); + SmallVectorImpl<MCParsedAsmOperand*> &Operands); bool ParseDirective(AsmToken DirectiveID); + OperandMatchResultTy parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&); public: MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) : MCTargetAsmParser() { @@ -35,6 +45,57 @@ public: }; } +namespace { + +/// MipsOperand - Instances of this class represent a parsed Mips machine +/// instruction. +class MipsOperand : public MCParsedAsmOperand { + enum KindTy { + k_CondCode, + k_CoprocNum, + k_Immediate, + k_Memory, + k_PostIndexRegister, + k_Register, + k_Token + } Kind; + + MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} +public: + void addRegOperands(MCInst &Inst, unsigned N) const { + llvm_unreachable("unimplemented!"); + } + void addExpr(MCInst &Inst, const MCExpr *Expr) const{ + llvm_unreachable("unimplemented!"); + } + void addImmOperands(MCInst &Inst, unsigned N) const { + llvm_unreachable("unimplemented!"); + } + void addMemOperands(MCInst &Inst, unsigned N) const { + llvm_unreachable("unimplemented!"); + } + + bool isReg() const { return Kind == k_Register; } + bool isImm() const { return Kind == k_Immediate; } + bool isToken() const { return Kind == k_Token; } + bool isMem() const { return Kind == k_Memory; } + + StringRef getToken() const { + assert(Kind == k_Token && "Invalid access!"); + return ""; + } + + unsigned getReg() const { + assert((Kind == k_Register) && "Invalid access!"); + return 0; + } + + virtual void print(raw_ostream &OS) const { + llvm_unreachable("unimplemented!"); + } +}; +} + bool MipsAsmParser:: MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, @@ -58,6 +119,11 @@ ParseDirective(AsmToken DirectiveID) { return true; } +MipsAsmParser::OperandMatchResultTy MipsAsmParser:: + parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&) { + return MatchOperand_ParseFail; +} + extern "C" void LLVMInitializeMipsAsmParser() { RegisterMCAsmParser<MipsAsmParser> X(TheMipsTarget); RegisterMCAsmParser<MipsAsmParser> Y(TheMipselTarget); diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index e9a228c..f535c50 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -10,13 +10,18 @@ tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel) tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv) tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM MipsGenEDInfo.inc -gen-enhanced-disassembly-info) +tablegen(LLVM MipsGenAsmMatcher.inc -gen-asm-matcher) add_public_tablegen_target(MipsCommonTableGen) add_llvm_target(MipsCodeGen + Mips16FrameLowering.cpp + Mips16InstrInfo.cpp + Mips16RegisterInfo.cpp MipsAnalyzeImmediate.cpp MipsAsmPrinter.cpp MipsCodeEmitter.cpp MipsDelaySlotFiller.cpp + MipsELFWriterInfo.cpp MipsJITInfo.cpp MipsInstrInfo.cpp MipsISelDAGToDAG.cpp @@ -26,6 +31,9 @@ add_llvm_target(MipsCodeGen MipsMCInstLower.cpp MipsMachineFunction.cpp MipsRegisterInfo.cpp + MipsSEFrameLowering.cpp + MipsSEInstrInfo.cpp + MipsSERegisterInfo.cpp MipsSubtarget.cpp MipsTargetMachine.cpp MipsTargetObjectFile.cpp diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 042b456..aa57472 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -16,6 +16,7 @@ #include "MipsRegisterInfo.h" #include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -274,7 +275,8 @@ MipsDisassembler::getInstruction(MCInst &instr, return MCDisassembler::Fail; // Calling the auto-generated decoder function. - Result = decodeMipsInstruction32(instr, Insn, Address, this, STI); + Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address, + this, STI); if (Result != MCDisassembler::Fail) { Size = 4; return Result; @@ -298,13 +300,15 @@ Mips64Disassembler::getInstruction(MCInst &instr, return MCDisassembler::Fail; // Calling the auto-generated decoder function. - Result = decodeMips64Instruction32(instr, Insn, Address, this, STI); + Result = decodeInstruction(DecoderTableMips6432, instr, Insn, Address, + this, STI); if (Result != MCDisassembler::Fail) { Size = 4; return Result; } // If we fail to decode in Mips64 decoder space we can try in Mips32 - Result = decodeMipsInstruction32(instr, Insn, Address, this, STI); + Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address, + this, STI); if (Result != MCDisassembler::Fail) { Size = 4; return Result; @@ -379,8 +383,8 @@ static DecodeStatus DecodeMem(MCInst &Inst, uint64_t Address, const void *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); - unsigned Reg = fieldFromInstruction32(Insn, 16, 5); - unsigned Base = fieldFromInstruction32(Insn, 21, 5); + unsigned Reg = fieldFromInstruction(Insn, 16, 5); + unsigned Base = fieldFromInstruction(Insn, 21, 5); Reg = getReg(Decoder, Mips::CPURegsRegClassID, Reg); Base = getReg(Decoder, Mips::CPURegsRegClassID, Base); @@ -401,8 +405,8 @@ static DecodeStatus DecodeFMem(MCInst &Inst, uint64_t Address, const void *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); - unsigned Reg = fieldFromInstruction32(Insn, 16, 5); - unsigned Base = fieldFromInstruction32(Insn, 21, 5); + unsigned Reg = fieldFromInstruction(Insn, 16, 5); + unsigned Base = fieldFromInstruction(Insn, 21, 5); Reg = getReg(Decoder, Mips::FGR64RegClassID, Reg); Base = getReg(Decoder, Mips::CPURegsRegClassID, Base); @@ -484,7 +488,7 @@ static DecodeStatus DecodeJumpTarget(MCInst &Inst, uint64_t Address, const void *Decoder) { - unsigned JumpOffset = fieldFromInstruction32(Insn, 0, 26) << 2; + unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 2; Inst.addOperand(MCOperand::CreateImm(JumpOffset)); return MCDisassembler::Success; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 6fe0c11..18961fd 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -35,6 +35,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { return 0; case FK_GPRel_4: case FK_Data_4: + case FK_Data_8: case Mips::fixup_Mips_LO16: case Mips::fixup_Mips_GPOFF_HI: case Mips::fixup_Mips_GPOFF_LO: @@ -59,9 +60,17 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { break; case Mips::fixup_Mips_HI16: case Mips::fixup_Mips_GOT_Local: - // Get the higher 16-bits. Also add 1 if bit 15 is 1. + // Get the 2nd 16-bits. Also add 1 if bit 15 is 1. Value = ((Value + 0x8000) >> 16) & 0xffff; break; + case Mips::fixup_Mips_HIGHER: + // Get the 3rd 16-bits. + Value = ((Value + 0x80008000LL) >> 32) & 0xffff; + break; + case Mips::fixup_Mips_HIGHEST: + // Get the 4th 16-bits. + Value = ((Value + 0x800080008000LL) >> 48) & 0xffff; + break; } return Value; @@ -168,7 +177,9 @@ public: { "fixup_Mips_GPOFF_LO", 0, 16, 0 }, { "fixup_Mips_GOT_PAGE", 0, 16, 0 }, { "fixup_Mips_GOT_OFST", 0, 16, 0 }, - { "fixup_Mips_GOT_DISP", 0, 16, 0 } + { "fixup_Mips_GOT_DISP", 0, 16, 0 }, + { "fixup_Mips_HIGHER", 0, 16, 0 }, + { "fixup_Mips_HIGHEST", 0, 16, 0 } }; if (Kind < FirstTargetFixupKind) diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 77c1524..b8489ca 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -34,7 +34,8 @@ namespace { class MipsELFObjectWriter : public MCELFObjectTargetWriter { public: - MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, bool _isN64); + MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, + bool _isN64, bool IsLittleEndian); virtual ~MipsELFObjectWriter(); @@ -53,7 +54,7 @@ namespace { } MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, - bool _isN64) + bool _isN64, bool IsLittleEndian) : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS, /*HasRelocationAddend*/ false, /*IsN64*/ _isN64) {} @@ -103,6 +104,9 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, case FK_Data_4: Type = ELF::R_MIPS_32; break; + case FK_Data_8: + Type = ELF::R_MIPS_64; + break; case FK_GPRel_4: Type = ELF::R_MIPS_GPREL32; break; @@ -169,6 +173,12 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type); Type = setRType3((unsigned)ELF::R_MIPS_LO16, Type); break; + case Mips::fixup_Mips_HIGHER: + Type = ELF::R_MIPS_HIGHER; + break; + case Mips::fixup_Mips_HIGHEST: + Type = ELF::R_MIPS_HIGHEST; + break; } return Type; } @@ -265,6 +275,7 @@ MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS, bool IsLittleEndian, bool Is64Bit) { MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI, - (Is64Bit) ? true : false); + (Is64Bit) ? true : false, + IsLittleEndian); return createELFObjectWriter(MOTW, OS, IsLittleEndian); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h index f5cbbd5..77faec5 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h +++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h @@ -110,6 +110,12 @@ namespace Mips { // resulting in - R_MIPS_GOT_DISP fixup_Mips_GOT_DISP, + // resulting in - R_MIPS_GOT_HIGHER + fixup_Mips_HIGHER, + + // resulting in - R_MIPS_HIGHEST + fixup_Mips_HIGHEST, + // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index ff3b3a7..8dab62d 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -255,6 +255,12 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, case MCSymbolRefExpr::VK_Mips_TPREL_LO: FixupKind = Mips::fixup_Mips_TPREL_LO; break; + case MCSymbolRefExpr::VK_Mips_HIGHER: + FixupKind = Mips::fixup_Mips_HIGHER; + break; + case MCSymbolRefExpr::VK_Mips_HIGHEST: + FixupKind = Mips::fixup_Mips_HIGHEST; + break; } // switch Fixups.push_back(MCFixup::Create(0, MO.getExpr(), MCFixupKind(FixupKind))); diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile index 596f071..93de517 100644 --- a/lib/Target/Mips/Makefile +++ b/lib/Target/Mips/Makefile @@ -16,7 +16,9 @@ BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \ MipsGenAsmWriter.inc MipsGenCodeEmitter.inc \ MipsGenDAGISel.inc MipsGenCallingConv.inc \ MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \ - MipsGenEDInfo.inc MipsGenDisassemblerTables.inc + MipsGenEDInfo.inc MipsGenDisassemblerTables.inc \ + MipsGenAsmMatcher.inc + DIRS = InstPrinter Disassembler AsmParser TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index 8548ae0..7cec531 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -44,6 +44,8 @@ def FeatureN64 : SubtargetFeature<"n64", "MipsABI", "N64", "Enable n64 ABI">; def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI", "Enable eabi ABI">; +def FeatureAndroid : SubtargetFeature<"android", "IsAndroid", "true", + "Target is android">; def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU", "true", "Enable vector FPU instructions.">; def FeatureSEInReg : SubtargetFeature<"seinreg", "HasSEInReg", "true", @@ -93,9 +95,20 @@ def MipsAsmWriter : AsmWriter { bit isMCAsmWriter = 1; } +def MipsAsmParser : AsmParser { + let ShouldEmitMatchRegisterName = 0; +} + +def MipsAsmParserVariant : AsmParserVariant { + int Variant = 0; + + // Recognize hard coded registers. + string RegisterPrefix = "$"; +} + def Mips : Target { let InstructionSet = MipsInstrInfo; - + let AssemblyParsers = [MipsAsmParser]; let AssemblyWriters = [MipsAsmWriter]; + let AssemblyParserVariants = [MipsAsmParserVariant]; } - diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp new file mode 100644 index 0000000..030042f --- /dev/null +++ b/lib/Target/Mips/Mips16FrameLowering.cpp @@ -0,0 +1,87 @@ +//===-- Mips16FrameLowering.cpp - Mips16 Frame Information ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips16 implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "Mips16FrameLowering.h" +#include "MipsInstrInfo.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const MipsInstrInfo &TII = + *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo()); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + uint64_t StackSize = MFI->getStackSize(); + + // No need to allocate space on the stack. + if (StackSize == 0 && !MFI->adjustsStack()) return; + + // Adjust stack. + if (isInt<16>(-StackSize)) + BuildMI(MBB, MBBI, dl, TII.get(Mips::SaveRaF16)).addImm(StackSize); +} + +void Mips16FrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const MipsInstrInfo &TII = + *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo()); + DebugLoc dl = MBBI->getDebugLoc(); + uint64_t StackSize = MFI->getStackSize(); + + if (!StackSize) + return; + + // Adjust stack. + if (isInt<16>(StackSize)) + // assumes stacksize multiple of 8 + BuildMI(MBB, MBBI, dl, TII.get(Mips::RestoreRaF16)).addImm(StackSize); +} + +bool Mips16FrameLowering:: +spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + // FIXME: implement. + return true; +} + +bool +Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + // FIXME: implement. + return true; +} + +void Mips16FrameLowering:: +processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { +} + +const MipsFrameLowering * +llvm::createMips16FrameLowering(const MipsSubtarget &ST) { + return new Mips16FrameLowering(ST); +} diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h new file mode 100644 index 0000000..25cc37b --- /dev/null +++ b/lib/Target/Mips/Mips16FrameLowering.h @@ -0,0 +1,43 @@ +//===-- Mips16FrameLowering.h - Mips16 frame lowering ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef MIPS16_FRAMEINFO_H +#define MIPS16_FRAMEINFO_H + +#include "MipsFrameLowering.h" + +namespace llvm { +class Mips16FrameLowering : public MipsFrameLowering { +public: + explicit Mips16FrameLowering(const MipsSubtarget &STI) + : MipsFrameLowering(STI) {} + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + bool hasReservedCallFrame(const MachineFunction &MF) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp new file mode 100644 index 0000000..2bc286b --- /dev/null +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -0,0 +1,132 @@ +//===-- Mips16InstrInfo.cpp - Mips16 Instruction Information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips16 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "Mips16InstrInfo.h" +#include "MipsTargetMachine.h" +#include "MipsMachineFunction.h" +#include "InstPrinter/MipsInstPrinter.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" + +using namespace llvm; + +Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm) + : MipsInstrInfo(tm, /* FIXME: set mips16 unconditional br */ 0), + RI(*tm.getSubtargetImpl(), *this) {} + +const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const { + return RI; +} + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned Mips16InstrInfo:: +isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const +{ + return 0; +} + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned Mips16InstrInfo:: +isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const +{ + return 0; +} + +void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc = 0, ZeroReg = 0; + + if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg. + if (Mips::CPURegsRegClass.contains(SrcReg)) + Opc = Mips::Mov32R16; + } + + assert(Opc && "Cannot copy registers"); + + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc)); + + if (DestReg) + MIB.addReg(DestReg, RegState::Define); + + if (ZeroReg) + MIB.addReg(ZeroReg); + + if (SrcReg) + MIB.addReg(SrcReg, getKillRegState(KillSrc)); +} + +void Mips16InstrInfo:: +storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + assert(false && "Implement this function."); +} + +void Mips16InstrInfo:: +loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + assert(false && "Implement this function."); +} + +bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + MachineBasicBlock &MBB = *MI->getParent(); + + switch(MI->getDesc().getOpcode()) { + default: + return false; + case Mips::RetRA16: + ExpandRetRA16(MBB, MI, Mips::JrRa16); + break; + } + + MBB.erase(MI); + return true; +} + +/// GetOppositeBranchOpc - Return the inverse of the specified +/// opcode, e.g. turning BEQ to BNE. +unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const { + assert(false && "Implement this function."); + return 0; +} + +unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { + return 0; +} + +void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned Opc) const { + BuildMI(MBB, I, I->getDebugLoc(), get(Opc)); +} + +const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) { + return new Mips16InstrInfo(TM); +} diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h new file mode 100644 index 0000000..260c5b6 --- /dev/null +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -0,0 +1,76 @@ +//===-- Mips16InstrInfo.h - Mips16 Instruction Information ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips16 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPS16INSTRUCTIONINFO_H +#define MIPS16INSTRUCTIONINFO_H + +#include "MipsInstrInfo.h" +#include "Mips16RegisterInfo.h" + +namespace llvm { + +class Mips16InstrInfo : public MipsInstrInfo { + const Mips16RegisterInfo RI; + +public: + explicit Mips16InstrInfo(MipsTargetMachine &TM); + + virtual const MipsRegisterInfo &getRegisterInfo() const; + + /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + + virtual unsigned GetOppositeBranchOpc(unsigned Opc) const; + +private: + virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; + + void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned Opc) const; +}; + +} + +#endif diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index c852042..94cf984 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -11,10 +11,6 @@ // //===----------------------------------------------------------------------===// -def uimm5 : Operand<i8> { - let DecoderMethod= "DecodeSimm16"; -} - // // RRR-type instruction format // @@ -46,9 +42,32 @@ class FEXT_RI16_ins<bits<5> _op, string asmstr, class FEXT_RI16_PC_ins<bits<5> _op, string asmstr, InstrItinClass itin>: FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $$pc, $imm", itin>; + +class FEXT_2RI16_ins<bits<5> _op, string asmstr, + InstrItinClass itin>: + FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm), + !strconcat(asmstr, "\t$rx, $imm"), [], itin> { + let Constraints = "$rx_ = $rx"; +} + + // // RR-type instruction format // + +class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> : + FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry), + !strconcat(asmstr, "\t$rx, $ry"), [], itin> { +} + +class FRxRxRy16_ins<bits<5> f, string asmstr, + InstrItinClass itin> : + FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry), + !strconcat(asmstr, "\t$rz, $ry"), + [], itin> { + let Constraints = "$rx = $rz"; +} + let rx=0 in class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_, string asmstr, InstrItinClass itin>: @@ -64,11 +83,16 @@ class FEXT_RRI16_mem_ins<bits<5> op, string asmstr, Operand MemOpnd, FEXT_RRI16<op, (outs CPU16Regs:$ry), (ins MemOpnd:$addr), !strconcat(asmstr, "\t$ry, $addr"), [], itin>; +class FEXT_RRI16_mem2_ins<bits<5> op, string asmstr, Operand MemOpnd, + InstrItinClass itin>: + FEXT_RRI16<op, (outs ), (ins CPU16Regs:$ry, MemOpnd:$addr), + !strconcat(asmstr, "\t$ry, $addr"), [], itin>; + // // EXT-SHIFT instruction format // class FEXT_SHIFT16_ins<bits<2> _f, string asmstr, InstrItinClass itin>: - FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, uimm5:$sa), + FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, shamt:$sa), !strconcat(asmstr, "\t$rx, $ry, $sa"), [], itin>; // @@ -80,20 +104,49 @@ def mem16 : Operand<i32> { } // +// Some general instruction class info +// +// + +class ArithLogic16Defs<bit isCom=0> { + bits<5> shamt = 0; + bit isCommutable = isCom; + bit isReMaterializable = 1; + bit neverHasSideEffects = 1; +} + +// + +// Format: ADDIU rx, immediate MIPS16e +// Purpose: Add Immediate Unsigned Word (2-Operand, Extended) +// To add a constant to a 32-bit integer. +// +def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIAlu>; + +def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIAlu>, + ArithLogic16Defs<0>; + +// + // Format: ADDIU rx, pc, immediate MIPS16e // Purpose: Add Immediate Unsigned Word (3-Operand, PC-Relative, Extended) // To add a constant to the program counter. // -class AddiuRxPcImmX16_base : FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>; -def AddiuRxPcImmX16 : AddiuRxPcImmX16_base; +def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>; // // Format: ADDU rz, rx, ry MIPS16e // Purpose: Add Unsigned Word (3-Operand) // To add 32-bit integers. // -class AdduRxRyRz16_base: FRRR16_ins<01, "addu", IIAlu>; -def AdduRxRyRz16: AdduRxRyRz16_base; +def AdduRxRyRz16: FRRR16_ins<01, "addu", IIAlu>, ArithLogic16Defs<1>; + +// +// Format: AND rx, ry MIPS16e +// Purpose: AND +// To do a bitwise logical AND. + +def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>; // // Format: JR ra MIPS16e @@ -105,6 +158,34 @@ def AdduRxRyRz16: AdduRxRyRz16_base; def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu>; // +// Format: LB ry, offset(rx) MIPS16e +// Purpose: Load Byte (Extended) +// To load a byte from memory as a signed value. +// +def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IIAlu>; + +// +// Format: LBU ry, offset(rx) MIPS16e +// Purpose: Load Byte Unsigned (Extended) +// To load a byte from memory as a unsigned value. +// +def LbuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IIAlu>; + +// +// Format: LH ry, offset(rx) MIPS16e +// Purpose: Load Halfword signed (Extended) +// To load a halfword from memory as a signed value. +// +def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IIAlu>; + +// +// Format: LHU ry, offset(rx) MIPS16e +// Purpose: Load Halfword unsigned (Extended) +// To load a halfword from memory as an unsigned value. +// +def LhuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IIAlu>; + +// // Format: LI rx, immediate MIPS16e // Purpose: Load Immediate (Extended) // To load a constant into a GPR. @@ -116,8 +197,7 @@ def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>; // Purpose: Load Word (Extended) // To load a word from memory as a signed value. // -class LwRxRyOffMemX16_base: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IIAlu>; -def LwRxRyOffMemX16: LwRxRyOffMemX16_base; +def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IIAlu>; // // Format: MOVE r32, rz MIPS16e @@ -125,6 +205,28 @@ def LwRxRyOffMemX16: LwRxRyOffMemX16_base; // To move the contents of a GPR to a GPR. // def Mov32R16: FI8_MOV32R16_ins<"move", IIAlu>; + +// +// Format: NEG rx, ry MIPS16e +// Purpose: Negate +// To negate an integer value. +// +def NegRxRy16: FRR16_ins<0b11101, "neg", IIAlu>; + +// +// Format: NOT rx, ry MIPS16e +// Purpose: Not +// To complement an integer value +// +def NotRxRy16: FRR16_ins<0b01111, "not", IIAlu>; + +// +// Format: OR rx, ry MIPS16e +// Purpose: Or +// To do a bitwise logical OR. +// +def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIAlu>, ArithLogic16Defs<1>; + // // Format: RESTORE {ra,}{s0/s1/s0-1,}{framesize} // (All args are optional) MIPS16e @@ -156,6 +258,20 @@ def SaveRaF16: "save \t$$ra, $frame_size", [], IILoad >; // +// Format: SB ry, offset(rx) MIPS16e +// Purpose: Store Byte (Extended) +// To store a byte to memory. +// +def SbRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIAlu>; + +// +// Format: SH ry, offset(rx) MIPS16e +// Purpose: Store Halfword (Extended) +// To store a halfword to memory. +// +def ShRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11001, "sh", mem16, IIAlu>; + +// // Format: SLL rx, ry, sa MIPS16e // Purpose: Shift Word Left Logical (Extended) // To execute a left-shift of a word by a fixed number of bits—0 to 31 bits. @@ -163,57 +279,127 @@ def SaveRaF16: def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIAlu>; // +// Format: SLLV ry, rx MIPS16e +// Purpose: Shift Word Left Logical Variable +// To execute a left-shift of a word by a variable number of bits. +// +def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIAlu>; + + +// +// Format: SRAV ry, rx MIPS16e +// Purpose: Shift Word Right Arithmetic Variable +// To execute an arithmetic right-shift of a word by a variable +// number of bits. +// +def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIAlu>; + + +// +// Format: SRA rx, ry, sa MIPS16e +// Purpose: Shift Word Right Arithmetic (Extended) +// To execute an arithmetic right-shift of a word by a fixed +// number of bits—1 to 8 bits. +// +def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIAlu>; + + +// +// Format: SRLV ry, rx MIPS16e +// Purpose: Shift Word Right Logical Variable +// To execute a logical right-shift of a word by a variable +// number of bits. +// +def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIAlu>; + + +// +// Format: SRL rx, ry, sa MIPS16e +// Purpose: Shift Word Right Logical (Extended) +// To execute a logical right-shift of a word by a fixed +// number of bits—1 to 31 bits. +// +def SrlX16: FEXT_SHIFT16_ins<0b10, "srl", IIAlu>; + +// +// Format: SUBU rz, rx, ry MIPS16e +// Purpose: Subtract Unsigned Word +// To subtract 32-bit integers +// +def SubuRxRyRz16: FRRR16_ins<0b11, "subu", IIAlu>, ArithLogic16Defs<0>; + +// // Format: SW ry, offset(rx) MIPS16e // Purpose: Store Word (Extended) // To store a word to memory. // -class SwRxRyOffMemX16_base: FEXT_RRI16_mem_ins<0b11011, "sw", mem16, IIAlu>; -def SwRxRyOffMemX16: SwRxRyOffMemX16_base; +def SwRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11011, "sw", mem16, IIAlu>; + +// +// Format: XOR rx, ry MIPS16e +// Purpose: Xor +// To do a bitwise logical XOR. +// +def XorRxRxRy16: FRxRxRy16_ins<0b01110, "xor", IIAlu>, ArithLogic16Defs<1>; class Mips16Pat<dag pattern, dag result> : Pat<pattern, result> { let Predicates = [InMips16Mode]; } -class ArithLogicR16Defs<SDNode OpNode, bit isComm = 0> { - dag OutOperandList = (outs CPU16Regs:$rz); - dag InOperandList = (ins CPU16Regs:$rx, CPU16Regs:$ry); - list<dag> Pattern = [(set CPU16Regs:$rz, - (OpNode CPU16Regs:$rx, CPU16Regs:$ry))]; -} +// Unary Arith/Logic +// +class ArithLogicU_pat<PatFrag OpNode, Instruction I> : + Mips16Pat<(OpNode CPU16Regs:$r), + (I CPU16Regs:$r)>; -multiclass ArithLogicR16_base { - def _add: AdduRxRyRz16_base, ArithLogicR16Defs<add, 1>; -} +def: ArithLogicU_pat<not, NotRxRy16>; +def: ArithLogicU_pat<ineg, NegRxRy16>; -defm ArithLogicR16_patt : ArithLogicR16_base; +class ArithLogic16_pat<SDNode OpNode, Instruction I> : + Mips16Pat<(OpNode CPU16Regs:$l, CPU16Regs:$r), + (I CPU16Regs:$l, CPU16Regs:$r)>; -class LoadM16Defs<PatFrag OpNode, Operand _MemOpnd, bit Pseudo=0> { - bit isPseudo = Pseudo; - Operand MemOpnd = _MemOpnd; - dag OutOperandList = (outs CPU16Regs:$ry); - dag InOperandList = (ins MemOpnd:$addr); - list<dag> Pattern = [(set CPU16Regs:$ry, (OpNode addr:$addr))]; -} +def: ArithLogic16_pat<add, AdduRxRyRz16>; +def: ArithLogic16_pat<and, AndRxRxRy16>; +def: ArithLogic16_pat<or, OrRxRxRy16>; +def: ArithLogic16_pat<sub, SubuRxRyRz16>; +def: ArithLogic16_pat<xor, XorRxRxRy16>; -multiclass LoadM16_base { - def _LwRxRyOffMemX16: LwRxRyOffMemX16_base, LoadM16Defs<load_a, mem16>; -} +// Arithmetic and logical instructions with 2 register operands. -defm LoadM16: LoadM16_base; +class ArithLogicI16_pat<SDNode OpNode, PatFrag imm_type, Instruction I> : + Mips16Pat<(OpNode CPU16Regs:$in, imm_type:$imm), + (I CPU16Regs:$in, imm_type:$imm)>; -class StoreM16Defs<PatFrag OpNode, Operand _MemOpnd, bit Pseudo=0> { - bit isPseudo = Pseudo; - Operand MemOpnd = _MemOpnd; - dag OutOperandList = (outs ); - dag InOperandList = (ins CPU16Regs:$ry, MemOpnd:$addr); - list<dag> Pattern = [(OpNode CPU16Regs:$ry, addr:$addr)]; -} +def: ArithLogicI16_pat<add, immSExt16, AddiuRxRxImmX16>; +def: ArithLogicI16_pat<shl, immZExt5, SllX16>; +def: ArithLogicI16_pat<srl, immZExt5, SrlX16>; +def: ArithLogicI16_pat<sra, immZExt5, SraX16>; -multiclass StoreM16_base { - def _SwRxRyOffMemX16: SwRxRyOffMemX16_base, StoreM16Defs<store_a, mem16>; -} +class shift_rotate_reg16_pat<SDNode OpNode, Instruction I> : + Mips16Pat<(OpNode CPU16Regs:$r, CPU16Regs:$ra), + (I CPU16Regs:$r, CPU16Regs:$ra)>; + +def: shift_rotate_reg16_pat<shl, SllvRxRy16>; +def: shift_rotate_reg16_pat<sra, SravRxRy16>; +def: shift_rotate_reg16_pat<srl, SrlvRxRy16>; + +class LoadM16_pat<PatFrag OpNode, Instruction I> : + Mips16Pat<(OpNode addr:$addr), (I addr:$addr)>; + +def: LoadM16_pat<sextloadi8, LbRxRyOffMemX16>; +def: LoadM16_pat<zextloadi8, LbuRxRyOffMemX16>; +def: LoadM16_pat<sextloadi16_a, LhRxRyOffMemX16>; +def: LoadM16_pat<zextloadi16_a, LhuRxRyOffMemX16>; +def: LoadM16_pat<load_a, LwRxRyOffMemX16>; + +class StoreM16_pat<PatFrag OpNode, Instruction I> : + Mips16Pat<(OpNode CPU16Regs:$r, addr:$addr), (I CPU16Regs:$r, addr:$addr)>; + +def: StoreM16_pat<truncstorei8, SbRxRyOffMemX16>; +def: StoreM16_pat<truncstorei16_a, ShRxRyOffMemX16>; +def: StoreM16_pat<store_a, SwRxRyOffMemX16>; -defm StoreM16: StoreM16_base; // Jump and Link (Call) let isCall=1, hasDelaySlot=1 in @@ -226,18 +412,8 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1, hasExtraSrcRegAllocReq = 1 in def RetRA16 : MipsPseudo16<(outs), (ins), "", [(MipsRet)]>; -// As stack alignment is always done with addiu, we need a 16-bit immediate -// This is basically deprecated code but needs to be there for things -// to work. -let Defs = [SP], Uses = [SP] in { -def ADJCALLSTACKDOWN16 : MipsPseudo16<(outs), (ins uimm16:$amt), - ";", - [(callseq_start timm:$amt)]>; -def ADJCALLSTACKUP16 : MipsPseudo16<(outs), (ins uimm16:$amt1, uimm16:$amt2), - ";", - [(callseq_end timm:$amt1, timm:$amt2)]>; -} - // Small immediates -def : Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>; -def : Mips16Pat<(MipsLo tglobaladdr:$in), (LiRxImmX16 tglobaladdr:$in)>; +def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>; + +def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)), + (AddiuRxRxImmX16 CPU16Regs:$hi, tglobaladdr:$lo)>; diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp new file mode 100644 index 0000000..c15d1bf --- /dev/null +++ b/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -0,0 +1,111 @@ +//===-- Mips16RegisterInfo.cpp - MIPS16 Register Information -== ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MIPS16 implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "Mips16RegisterInfo.h" +#include "Mips.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsInstrInfo.h" +#include "MipsSubtarget.h" +#include "MipsMachineFunction.h" +#include "llvm/Constants.h" +#include "llvm/DebugInfo.h" +#include "llvm/Type.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" + +using namespace llvm; + +Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST, + const TargetInstrInfo &TII) + : MipsRegisterInfo(ST, TII) {} + +// This function eliminate ADJCALLSTACKDOWN, +// ADJCALLSTACKUP pseudo instructions +void Mips16RegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. + MBB.erase(I); +} + +void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, + unsigned OpNo, int FrameIndex, + uint64_t StackSize, + int64_t SPOffset) const { + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + // The following stack frame objects are always + // referenced relative to $sp: + // 1. Outgoing arguments. + // 2. Pointer to dynamically allocated stack space. + // 3. Locations for callee-saved registers. + // Everything else is referenced relative to whatever register + // getFrameRegister() returns. + unsigned FrameReg; + + if (MipsFI->isOutArgFI(FrameIndex) || + (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) + FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; + else + FrameReg = getFrameRegister(MF); + + // Calculate final offset. + // - There is no need to change the offset if the frame object + // is one of the + // following: an outgoing argument, pointer to a dynamically allocated + // stack space or a $gp restore location, + // - If the frame object is any of the following, + // its offset must be adjusted + // by adding the size of the stack: + // incoming argument, callee-saved register location or local variable. + int64_t Offset; + + if (MipsFI->isOutArgFI(FrameIndex)) + Offset = SPOffset; + else + Offset = SPOffset + (int64_t)StackSize; + + Offset += MI.getOperand(OpNo + 1).getImm(); + + DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); + + MI.getOperand(OpNo).ChangeToRegister(FrameReg, false); + MI.getOperand(OpNo + 1).ChangeToImmediate(Offset); + + +} diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h new file mode 100644 index 0000000..3f4b3a7 --- /dev/null +++ b/lib/Target/Mips/Mips16RegisterInfo.h @@ -0,0 +1,37 @@ +//===-- Mips16RegisterInfo.h - Mips16 Register Information ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips16 implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPS16REGISTERINFO_H +#define MIPS16REGISTERINFO_H + +#include "MipsRegisterInfo.h" + +namespace llvm { + +class Mips16RegisterInfo : public MipsRegisterInfo { +public: + Mips16RegisterInfo(const MipsSubtarget &Subtarget, + const TargetInstrInfo &TII); + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; +private: + virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, + int FrameIndex, uint64_t StackSize, + int64_t SPOffset) const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index cceee24..20fc178 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -208,26 +208,25 @@ def DCLO : CountLeading1<0x25, "dclo", CPU64Regs>; def DSBH : SubwordSwap<0x24, 0x2, "dsbh", CPU64Regs>; def DSHD : SubwordSwap<0x24, 0x5, "dshd", CPU64Regs>; -def LEA_ADDiu64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>; +def LEA_ADDiu64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>; } let Uses = [SP_64], DecoderNamespace = "Mips64" in -def DynAlloc64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>, - Requires<[IsN64, HasStandardEncoding]> { - let isCodeGenOnly = 1; -} +def DynAlloc64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>, + Requires<[IsN64, HasStandardEncoding]>; let DecoderNamespace = "Mips64" in { def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>; def DEXT : ExtBase<3, "dext", CPU64Regs>; def DINS : InsBase<7, "dins", CPU64Regs>; -def DSLL64_32 : FR<0x3c, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt), - "dsll\t$rd, $rt, 32", [], IIAlu>; -def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt), - "sll\t$rd, $rt, 0", [], IIAlu>; -let isCodeGenOnly = 1 in -def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt), - "sll\t$rd, $rt, 0", [], IIAlu>; +let isCodeGenOnly = 1, rs = 0, shamt = 0 in { + def DSLL64_32 : FR<0x00, 0x3c, (outs CPU64Regs:$rd), (ins CPURegs:$rt), + "dsll\t$rd, $rt, 32", [], IIAlu>; + def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt), + "sll\t$rd, $rt, 0", [], IIAlu>; + def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt), + "sll\t$rd, $rt, 0", [], IIAlu>; +} } //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index 8aadefd..19213fa 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -145,6 +145,17 @@ def RetCC_MipsEABI : CallingConv<[ ]>; //===----------------------------------------------------------------------===// +// Mips Android Calling Convention +//===----------------------------------------------------------------------===// + +def RetCC_MipsAndroid : CallingConv<[ + // f32 are returned in registers F0, F2, F1, F3 + CCIfType<[f32], CCAssignToReg<[F0, F2, F1, F3]>>, + + CCDelegateTo<RetCC_MipsO32> +]>; + +//===----------------------------------------------------------------------===// // Mips FastCC Calling Convention //===----------------------------------------------------------------------===// def CC_MipsO32_FastCC : CallingConv<[ @@ -210,6 +221,7 @@ def RetCC_Mips : CallingConv<[ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>, CCIfSubtarget<"isABI_N32()", CCDelegateTo<RetCC_MipsN>>, CCIfSubtarget<"isABI_N64()", CCDelegateTo<RetCC_MipsN>>, + CCIfSubtarget<"isAndroid()", CCDelegateTo<RetCC_MipsAndroid>>, CCDelegateTo<RetCC_MipsO32> ]>; diff --git a/lib/Target/Mips/MipsELFWriterInfo.cpp b/lib/Target/Mips/MipsELFWriterInfo.cpp new file mode 100644 index 0000000..ac3a547 --- /dev/null +++ b/lib/Target/Mips/MipsELFWriterInfo.cpp @@ -0,0 +1,92 @@ +//===-- MipsELFWriterInfo.cpp - ELF Writer Info for the Mips backend ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF writer information for the Mips backend. +// +//===----------------------------------------------------------------------===// + +#include "MipsELFWriterInfo.h" +#include "MipsRelocations.h" +#include "llvm/Function.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/ELF.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Implementation of the MipsELFWriterInfo class +//===----------------------------------------------------------------------===// + +MipsELFWriterInfo::MipsELFWriterInfo(bool is64Bit_, bool isLittleEndian_) + : TargetELFWriterInfo(is64Bit_, isLittleEndian_) { + EMachine = EM_MIPS; +} + +MipsELFWriterInfo::~MipsELFWriterInfo() {} + +unsigned MipsELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { + switch(MachineRelTy) { + case Mips::reloc_mips_pc16: + return ELF::R_MIPS_GOT16; + case Mips::reloc_mips_hi: + return ELF::R_MIPS_HI16; + case Mips::reloc_mips_lo: + return ELF::R_MIPS_LO16; + case Mips::reloc_mips_26: + return ELF::R_MIPS_26; + default: + llvm_unreachable("unknown Mips machine relocation type"); + } +} + +long int MipsELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, + long int Modifier) const { + switch(RelTy) { + case ELF::R_MIPS_26: return Modifier; + default: + llvm_unreachable("unknown Mips relocation type"); + } +} + +unsigned MipsELFWriterInfo::getRelocationTySize(unsigned RelTy) const { + switch(RelTy) { + case ELF::R_MIPS_GOT16: + case ELF::R_MIPS_26: + return 32; + default: + llvm_unreachable("unknown Mips relocation type"); + } +} + +bool MipsELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { + switch(RelTy) { + case ELF::R_MIPS_GOT16: + return true; + case ELF::R_MIPS_26: + return false; + default: + llvm_unreachable("unknown Mips relocation type"); + } +} + +unsigned MipsELFWriterInfo::getAbsoluteLabelMachineRelTy() const { + return Mips::reloc_mips_26; +} + +long int MipsELFWriterInfo::computeRelocation(unsigned SymOffset, + unsigned RelOffset, + unsigned RelTy) const { + + if (RelTy == ELF::R_MIPS_GOT16) + return SymOffset - (RelOffset + 4); + + llvm_unreachable("computeRelocation unknown for this relocation type"); +} diff --git a/lib/Target/Mips/MipsELFWriterInfo.h b/lib/Target/Mips/MipsELFWriterInfo.h new file mode 100644 index 0000000..23f3f03 --- /dev/null +++ b/lib/Target/Mips/MipsELFWriterInfo.h @@ -0,0 +1,59 @@ +//===-- MipsELFWriterInfo.h - ELF Writer Info for Mips ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF writer information for the Mips backend. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPS_ELF_WRITER_INFO_H +#define MIPS_ELF_WRITER_INFO_H + +#include "llvm/Target/TargetELFWriterInfo.h" + +namespace llvm { + + class MipsELFWriterInfo : public TargetELFWriterInfo { + + public: + MipsELFWriterInfo(bool is64Bit_, bool isLittleEndian_); + virtual ~MipsELFWriterInfo(); + + /// getRelocationType - Returns the target specific ELF Relocation type. + /// 'MachineRelTy' contains the object code independent relocation type + virtual unsigned getRelocationType(unsigned MachineRelTy) const; + + /// hasRelocationAddend - True if the target uses an addend in the + /// ELF relocation entry. + virtual bool hasRelocationAddend() const { return is64Bit ? true : false; } + + /// getDefaultAddendForRelTy - Gets the default addend value for a + /// relocation entry based on the target ELF relocation type. + virtual long int getDefaultAddendForRelTy(unsigned RelTy, + long int Modifier = 0) const; + + /// getRelTySize - Returns the size of relocatable field in bits + virtual unsigned getRelocationTySize(unsigned RelTy) const; + + /// isPCRelativeRel - True if the relocation type is pc relative + virtual bool isPCRelativeRel(unsigned RelTy) const; + + /// getJumpTableRelocationTy - Returns the machine relocation type used + /// to reference a jumptable. + virtual unsigned getAbsoluteLabelMachineRelTy() const; + + /// computeRelocation - Some relocatable fields could be relocated + /// directly, avoiding the relocation symbol emission, compute the + /// final relocation value for this symbol. + virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset, + unsigned RelTy) const; + }; + +} // end llvm namespace + +#endif // MIPS_ELF_WRITER_INFO_H diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 6338f3c..8c0474b 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -15,6 +15,7 @@ #include "MipsAnalyzeImmediate.h" #include "MipsInstrInfo.h" #include "MipsMachineFunction.h" +#include "MipsTargetMachine.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -81,6 +82,14 @@ using namespace llvm; // //===----------------------------------------------------------------------===// +const MipsFrameLowering *MipsFrameLowering::create(MipsTargetMachine &TM, + const MipsSubtarget &ST) { + if (TM.getSubtargetImpl()->inMips16Mode()) + return llvm::createMips16FrameLowering(ST); + + return llvm::createMipsSEFrameLowering(ST); +} + // hasFP - Return true if the specified function should have a dedicated frame // pointer register. This is true if the function has variable sized allocas or // if frame pointer elimination is disabled. @@ -89,218 +98,3 @@ bool MipsFrameLowering::hasFP(const MachineFunction &MF) const { return MF.getTarget().Options.DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken(); } - -bool MipsFrameLowering::targetHandlesStackFrameRounding() const { - return true; -} - -void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - const MipsRegisterInfo *RegInfo = - static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo()); - const MipsInstrInfo &TII = - *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo()); - MachineBasicBlock::iterator MBBI = MBB.begin(); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; - unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; - unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; - unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; - unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; - - // First, compute final stack size. - unsigned StackAlign = getStackAlignment(); - uint64_t StackSize = RoundUpToAlignment(MFI->getStackSize(), StackAlign); - - if (MipsFI->globalBaseRegSet()) - StackSize += MFI->getObjectOffset(MipsFI->getGlobalRegFI()) + StackAlign; - else - StackSize += RoundUpToAlignment(MipsFI->getMaxCallFrameSize(), StackAlign); - - // Update stack size - MFI->setStackSize(StackSize); - - // No need to allocate space on the stack. - if (StackSize == 0 && !MFI->adjustsStack()) return; - - MachineModuleInfo &MMI = MF.getMMI(); - std::vector<MachineMove> &Moves = MMI.getFrameMoves(); - MachineLocation DstML, SrcML; - - // Adjust stack. - if (isInt<16>(-StackSize)) {// addi sp, sp, (-stacksize) - if (STI.inMips16Mode()) - BuildMI(MBB, MBBI, dl, - TII.get(Mips::SaveRaF16)).addImm(StackSize); // cleanup - else - BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(-StackSize); - } - else { // Expand immediate that doesn't fit in 16-bit. - unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT; - - MF.getInfo<MipsFunctionInfo>()->setEmitNOAT(); - Mips::loadImmediate(-StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl, false, - 0); - BuildMI(MBB, MBBI, dl, TII.get(ADDu), SP).addReg(SP).addReg(ATReg); - } - - // emit ".cfi_def_cfa_offset StackSize" - MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, - TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel); - DstML = MachineLocation(MachineLocation::VirtualFP); - SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize); - Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML)); - - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - - if (CSI.size()) { - // Find the instruction past the last instruction that saves a callee-saved - // register to the stack. - for (unsigned i = 0; i < CSI.size(); ++i) - ++MBBI; - - // Iterate over list of callee-saved registers and emit .cfi_offset - // directives. - MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, - TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel); - - for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), - E = CSI.end(); I != E; ++I) { - int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); - unsigned Reg = I->getReg(); - - // If Reg is a double precision register, emit two cfa_offsets, - // one for each of the paired single precision registers. - if (Mips::AFGR64RegClass.contains(Reg)) { - MachineLocation DstML0(MachineLocation::VirtualFP, Offset); - MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4); - MachineLocation SrcML0(RegInfo->getSubReg(Reg, Mips::sub_fpeven)); - MachineLocation SrcML1(RegInfo->getSubReg(Reg, Mips::sub_fpodd)); - - if (!STI.isLittle()) - std::swap(SrcML0, SrcML1); - - Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0)); - Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1)); - } else { - // Reg is either in CPURegs or FGR32. - DstML = MachineLocation(MachineLocation::VirtualFP, Offset); - SrcML = MachineLocation(Reg); - Moves.push_back(MachineMove(CSLabel, DstML, SrcML)); - } - } - } - - // if framepointer enabled, set it to point to the stack pointer. - if (hasFP(MF)) { - // Insert instruction "move $fp, $sp" at this location. - BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO); - - // emit ".cfi_def_cfa_register $fp" - MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, - TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel); - DstML = MachineLocation(FP); - SrcML = MachineLocation(MachineLocation::VirtualFP); - Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML)); - } -} - -void MipsFrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const MipsInstrInfo &TII = - *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo()); - DebugLoc dl = MBBI->getDebugLoc(); - unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; - unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; - unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; - unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; - unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; - - // if framepointer enabled, restore the stack pointer. - if (hasFP(MF)) { - // Find the first instruction that restores a callee-saved register. - MachineBasicBlock::iterator I = MBBI; - - for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i) - --I; - - // Insert instruction "move $sp, $fp" at this location. - BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO); - } - - // Get the number of bytes from FrameInfo - uint64_t StackSize = MFI->getStackSize(); - - if (!StackSize) - return; - - // Adjust stack. - if (isInt<16>(StackSize)) { // addi sp, sp, (-stacksize) - if (STI.inMips16Mode()) - // assumes stacksize multiple of 8 - BuildMI(MBB, MBBI, dl, - TII.get(Mips::RestoreRaF16)).addImm(StackSize); - else - BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(StackSize); - } - else { // Expand immediate that doesn't fit in 16-bit. - unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT; - - MF.getInfo<MipsFunctionInfo>()->setEmitNOAT(); - Mips::loadImmediate(StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl, false, - 0); - BuildMI(MBB, MBBI, dl, TII.get(ADDu), SP).addReg(SP).addReg(ATReg); - } -} - -void MipsFrameLowering:: -processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { - MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; - - // FIXME: remove this code if register allocator can correctly mark - // $fp and $ra used or unused. - - // Mark $fp and $ra as used or unused. - if (hasFP(MF)) - MRI.setPhysRegUsed(FP); -} - -bool MipsFrameLowering:: -spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - MachineFunction *MF = MBB.getParent(); - MachineBasicBlock *EntryBlock = MF->begin(); - const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); - - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - // Add the callee-saved register as live-in. Do not add if the register is - // RA and return address is taken, because it has already been added in - // method MipsTargetLowering::LowerRETURNADDR. - // It's killed at the spill, unless the register is RA and return address - // is taken. - unsigned Reg = CSI[i].getReg(); - bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA || Reg == Mips::RA_64) - && MF->getFrameInfo()->isReturnAddressTaken(); - if (!IsRAAndRetAddrIsTaken) - EntryBlock->addLiveIn(Reg); - - // Insert the spill to the stack frame. - bool IsKill = !IsRAAndRetAddrIsTaken; - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(*EntryBlock, MI, Reg, IsKill, - CSI[i].getFrameIdx(), RC, TRI); - } - - return true; -} diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h index e364ded..ed7b7fe 100644 --- a/lib/Target/Mips/MipsFrameLowering.h +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -27,28 +27,19 @@ protected: public: explicit MipsFrameLowering(const MipsSubtarget &sti) - : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0), - STI(sti) { - } + : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0, + sti.hasMips64() ? 16 : 8), STI(sti) {} - bool targetHandlesStackFrameRounding() const; - - /// emitProlog/emitEpilog - These methods insert prolog and epilog code into - /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - - bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; + static const MipsFrameLowering *create(MipsTargetMachine &TM, + const MipsSubtarget &ST); bool hasFP(const MachineFunction &MF) const; - - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const; }; +/// Create MipsInstrInfo objects. +const MipsFrameLowering *createMips16FrameLowering(const MipsSubtarget &ST); +const MipsFrameLowering *createMipsSEFrameLowering(const MipsSubtarget &ST); + } // End llvm namespace #endif diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index ea33b74..5a97c17 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -117,28 +117,23 @@ private: void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - if (((MF.getTarget().getRelocationModel() == Reloc::Static) || - Subtarget.inMips16Mode()) && !MipsFI->globalBaseRegSet()) + if (!MipsFI->globalBaseRegSet()) return; MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator I = MBB.begin(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); - const MipsRegisterInfo *TargetRegInfo = TM.getRegisterInfo(); - const MipsInstrInfo *MII = TM.getInstrInfo(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg(); - int FI = 0; + const TargetRegisterClass *RC; - FI= MipsFI->initGlobalRegFI(); - - const TargetRegisterClass *RC = Subtarget.isABI_N64() ? - (const TargetRegisterClass*)&Mips::CPU64RegsRegClass : - (const TargetRegisterClass*)&Mips::CPURegsRegClass; - - if (Subtarget.inMips16Mode()) - RC=(const TargetRegisterClass*)&Mips::CPU16RegsRegClass; + if (Subtarget.isABI_N64()) + RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass; + else if (Subtarget.inMips16Mode()) + RC = (const TargetRegisterClass*)&Mips::CPU16RegsRegClass; + else + RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass; V0 = RegInfo.createVirtualRegister(RC); V1 = RegInfo.createVirtualRegister(RC); @@ -158,23 +153,17 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { .addReg(Mips::T9_64); BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); - MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC, - TargetRegInfo); return; } if (Subtarget.inMips16Mode()) { BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0) - .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI); - BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), - V1) - .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); - BuildMI(MBB, I, DL, TII.get(Mips::SllX16), - V2 ).addReg(V0).addImm(16); + .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI); + BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1) + .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); + BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16); BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg) .addReg(V1).addReg(V2); - - return; } @@ -203,19 +192,11 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9); BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); - MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC, - TargetRegInfo); return; } assert(Subtarget.isABI_O32()); - - //if (Subtarget.inMips16Mode()) - // return; // no need to load GP. It can be calculated anywhere - - - // For O32 ABI, the following instruction sequence is emitted to initialize // the global base register: // @@ -237,7 +218,6 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { MBB.addLiveIn(Mips::V0); BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg) .addReg(Mips::V0).addReg(Mips::T9); - MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC, TargetRegInfo); } bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, @@ -262,13 +242,14 @@ bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, // Replace uses with ZeroReg. for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg), - E = MRI->use_end(); U != E; ++U) { + E = MRI->use_end(); U != E;) { MachineOperand &MO = U.getOperand(); + unsigned OpNo = U.getOperandNo(); MachineInstr *MI = MO.getParent(); + ++U; // Do not replace if it is a phi's operand or is tied to def operand. - if (MI->isPHI() || MI->isRegTiedToDefOperand(U.getOperandNo()) || - MI->isPseudo()) + if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo()) continue; MO.setReg(ZeroReg); @@ -309,21 +290,6 @@ bool MipsDAGToDAGISel:: SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { EVT ValTy = Addr.getValueType(); - // If Parent is an unaligned f32 load or store, select a (base + index) - // floating point load/store instruction (luxc1 or suxc1). - const LSBaseSDNode *LS = 0; - - if (Parent && (LS = dyn_cast<LSBaseSDNode>(Parent))) { - EVT VT = LS->getMemoryVT(); - - if (VT.getSizeInBits() / 8 > LS->getAlignment()) { - assert(TLI.allowsUnalignedMemoryAccesses(VT) && - "Unaligned loads/stores not supported for this type."); - if (VT == MVT::f32) - return false; - } - } - // if Address is FI, get the TargetFrameIndex. if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); @@ -382,6 +348,8 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { } // If an indexed floating point load/store can be emitted, return false. + const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent); + if (LS && (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && Subtarget.hasMips32r2Or64()) diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 7741f9f..c5207c6 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -157,7 +157,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::SETCC, MVT::f32, Custom); setOperationAction(ISD::SETCC, MVT::f64, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Custom); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); @@ -178,7 +177,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::JumpTable, MVT::i64, Custom); setOperationAction(ISD::ConstantPool, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::i64, Custom); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); setOperationAction(ISD::LOAD, MVT::i64, Custom); setOperationAction(ISD::STORE, MVT::i64, Custom); } @@ -217,6 +215,8 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i64, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); if (!Subtarget->hasMips32r2()) setOperationAction(ISD::ROTR, MVT::i32, Expand); @@ -314,8 +314,6 @@ bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { case MVT::i64: case MVT::i32: return true; - case MVT::f32: - return Subtarget->hasMips32r2Or64(); default: return false; } @@ -794,7 +792,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); - case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); @@ -1504,42 +1501,6 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, // Misc Lower Operation implementation //===----------------------------------------------------------------------===// SDValue MipsTargetLowering:: -LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const -{ - MachineFunction &MF = DAG.getMachineFunction(); - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - unsigned SP = IsN64 ? Mips::SP_64 : Mips::SP; - - assert(getTargetMachine().getFrameLowering()->getStackAlignment() >= - cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue() && - "Cannot lower if the alignment of the allocated space is larger than \ - that of the stack."); - - SDValue Chain = Op.getOperand(0); - SDValue Size = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); - - // Get a reference from Mips stack pointer - SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SP, getPointerTy()); - - // Subtract the dynamic size from the actual stack size to - // obtain the new stack size. - SDValue Sub = DAG.getNode(ISD::SUB, dl, getPointerTy(), StackPointer, Size); - - // The Sub result contains the new stack start address, so it - // must be placed in the stack pointer register. - Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, SP, Sub, SDValue()); - - // This node always has two return values: a new stack pointer - // value and a chain - SDVTList VTLs = DAG.getVTList(getPointerTy(), MVT::Other); - SDValue Ptr = DAG.getFrameIndex(MipsFI->getDynAllocFI(), getPointerTy()); - SDValue Ops[] = { Chain, Ptr, Chain.getValue(1) }; - - return DAG.getNode(MipsISD::DynAlloc, dl, VTLs, Ops, 3); -} - -SDValue MipsTargetLowering:: LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // The first operand is the chain, the second is the condition, the third is @@ -2455,9 +2416,9 @@ static unsigned getNextIntArgReg(unsigned Reg) { // Write ByVal Arg to arg registers and stack. static void -WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl, +WriteByValArg(SDValue Chain, DebugLoc dl, SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass, - SmallVector<SDValue, 8> &MemOpChains, int &LastFI, + SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, MVT PtrType, bool isLittle) { @@ -2531,24 +2492,24 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl, return; } - // Create a fixed object on stack at offset LocMemOffset and copy - // remaining part of byval arg to it using memcpy. + // Copy remaining part of byval arg using memcpy. SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, DAG.getConstant(Offset, MVT::i32)); - LastFI = MFI->CreateFixedObject(RemainingSize, LocMemOffset, true); - SDValue Dst = DAG.getFrameIndex(LastFI, PtrType); - ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src, - DAG.getConstant(RemainingSize, MVT::i32), - std::min(ByValAlign, (unsigned)4), - /*isVolatile=*/false, /*AlwaysInline=*/false, - MachinePointerInfo(0), MachinePointerInfo(0)); + SDValue Dst = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, + DAG.getIntPtrConstant(LocMemOffset)); + Chain = DAG.getMemcpy(Chain, dl, Dst, Src, + DAG.getConstant(RemainingSize, MVT::i32), + std::min(ByValAlign, (unsigned)4), + /*isVolatile=*/false, /*AlwaysInline=*/false, + MachinePointerInfo(0), MachinePointerInfo(0)); + MemOpChains.push_back(Chain); } // Copy Mips64 byVal arg to registers and stack. void static -PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, +PassByValArg64(SDValue Chain, DebugLoc dl, SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass, - SmallVector<SDValue, 8> &MemOpChains, int &LastFI, + SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, EVT PtrTy, bool isLittle) { @@ -2620,16 +2581,16 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, assert(MemCpySize && "MemCpySize must not be zero."); - // Create a fixed object on stack at offset LocMemOffset and copy - // remainder of byval arg to it with memcpy. + // Copy remainder of byval arg to it with memcpy. SDValue Src = DAG.getNode(ISD::ADD, dl, PtrTy, Arg, DAG.getConstant(Offset, PtrTy)); - LastFI = MFI->CreateFixedObject(MemCpySize, LocMemOffset, true); - SDValue Dst = DAG.getFrameIndex(LastFI, PtrTy); - ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src, - DAG.getConstant(MemCpySize, PtrTy), Alignment, - /*isVolatile=*/false, /*AlwaysInline=*/false, - MachinePointerInfo(0), MachinePointerInfo(0)); + SDValue Dst = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, + DAG.getIntPtrConstant(LocMemOffset)); + Chain = DAG.getMemcpy(Chain, dl, Dst, Src, + DAG.getConstant(MemCpySize, PtrTy), Alignment, + /*isVolatile=*/false, /*AlwaysInline=*/false, + MachinePointerInfo(0), MachinePointerInfo(0)); + MemOpChains.push_back(Chain); } /// LowerCall - functions arguments are copied from virtual regs to @@ -2643,9 +2604,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; SmallVector<SDValue, 32> &OutVals = CLI.OutVals; SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; - SDValue InChain = CLI.Chain; + SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; - SDValue CalleeSave = CLI.Callee; bool &isTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; @@ -2675,18 +2635,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Get a count of how many bytes are to be pushed on the stack. unsigned NextStackOffset = CCInfo.getNextStackOffset(); - - // Chain is the output chain of the last Load/Store or CopyToReg node. - // ByValChain is the output chain of the last Memcpy node created for copying - // byval arguments to the stack. - SDValue Chain, CallSeqStart, ByValChain; - SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true); - Chain = CallSeqStart = DAG.getCALLSEQ_START(InChain, NextStackOffsetVal); - ByValChain = InChain; - - // Get the frame index of the stack frame object that points to the location - // of dynamically allocated area on the stack. - int DynAllocFI = MipsFI->getDynAllocFI(); + unsigned StackAlignment = TFL->getStackAlignment(); + NextStackOffset = RoundUpToAlignment(NextStackOffset, StackAlignment); // Update size of the maximum argument space. // For O32, a minimum of four words (16 bytes) of argument space is @@ -2694,27 +2644,23 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (IsO32 && (CallConv != CallingConv::Fast)) NextStackOffset = std::max(NextStackOffset, (unsigned)16); - unsigned MaxCallFrameSize = MipsFI->getMaxCallFrameSize(); - - if (MaxCallFrameSize < NextStackOffset) { - MipsFI->setMaxCallFrameSize(NextStackOffset); + // Chain is the output chain of the last Load/Store or CopyToReg node. + // ByValChain is the output chain of the last Memcpy node created for copying + // byval arguments to the stack. + SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true); + Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal); - // Set the offsets relative to $sp of the $gp restore slot and dynamically - // allocated stack space. These offsets must be aligned to a boundary - // determined by the stack alignment of the ABI. - unsigned StackAlignment = TFL->getStackAlignment(); - NextStackOffset = (NextStackOffset + StackAlignment - 1) / - StackAlignment * StackAlignment; + SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, + IsN64 ? Mips::SP_64 : Mips::SP, + getPointerTy()); - MFI->setObjectOffset(DynAllocFI, NextStackOffset); - } + if (MipsFI->getMaxCallFrameSize() < NextStackOffset) + MipsFI->setMaxCallFrameSize(NextStackOffset); // With EABI is it possible to have 16 args on registers. SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass; SmallVector<SDValue, 8> MemOpChains; - int FirstFI = -MFI->getNumFixedObjects() - 1, LastFI = 0; - // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { SDValue Arg = OutVals[i]; @@ -2727,11 +2673,11 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); if (IsO32) - WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, + WriteByValArg(Chain, dl, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle()); else - PassByValArg64(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, + PassByValArg64(Chain, dl, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle()); continue; @@ -2781,29 +2727,14 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Register can't get to this point... assert(VA.isMemLoc()); - // Create the frame index object for this incoming parameter - LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, - VA.getLocMemOffset(), true); - SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy()); - // emit ISD::STORE whichs stores the // parameter value to a stack Location + SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, + DAG.getIntPtrConstant(VA.getLocMemOffset())); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(), false, false, 0)); } - // Extend range of indices of frame objects for outgoing arguments that were - // created during this function call. Skip this step if no such objects were - // created. - if (LastFI) - MipsFI->extendOutArgFIRange(FirstFI, LastFI); - - // If a memcpy has been created to copy a byval arg to a stack, replace the - // chain input of CallSeqStart with ByValChain. - if (InChain != ByValChain) - DAG.UpdateNodeOperands(CallSeqStart.getNode(), ByValChain, - NextStackOffsetVal); - // Transform all store nodes into one single node because all store // nodes are independent of each other. if (!MemOpChains.empty()) @@ -2867,6 +2798,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } } + // T9 register operand. + SDValue T9; + // T9 should contain the address of the callee function if // -reloction-model=pic or it is an indirect call. if (IsPICCall || !GlobalOrExternal) { @@ -2874,7 +2808,11 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0)); InFlag = Chain.getValue(1); - Callee = DAG.getRegister(T9Reg, getPointerTy()); + + if (Subtarget->inMips16Mode()) + T9 = DAG.getRegister(T9Reg, getPointerTy()); + else + Callee = DAG.getRegister(T9Reg, getPointerTy()); } // Insert node "GP copy globalreg" before call to function. @@ -2902,7 +2840,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops; Ops.push_back(Chain); - Ops.push_back(Subtarget->inMips16Mode()? CalleeSave: Callee); + Ops.push_back(Callee); // Add argument registers to the end of the list so that they are // known live into the call. @@ -2910,8 +2848,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); - if (Subtarget->inMips16Mode()) - Ops.push_back(Callee); + // Add T9 register operand. + if (T9.getNode()) + Ops.push_back(T9); + // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); @@ -2925,8 +2865,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. - Chain = DAG.getCALLSEQ_END(Chain, - DAG.getIntPtrConstant(NextStackOffset, true), + Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal, DAG.getIntPtrConstant(0, true), InFlag); InFlag = Chain.getValue(1); diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index edab03c..95ea8fa 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -132,7 +132,6 @@ namespace llvm { // Lower Operand specifics SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 9654b86..df45df4 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -101,18 +101,18 @@ class FPStore<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>: } // FP indexed load. class FPIdxLoad<bits<6> funct, string opstr, RegisterClass DRC, - RegisterClass PRC, PatFrag FOp>: + RegisterClass PRC, SDPatternOperator FOp = null_frag>: FFMemIdx<funct, (outs DRC:$fd), (ins PRC:$base, PRC:$index), - !strconcat(opstr, "\t$fd, $index($base)"), + !strconcat(opstr, "\t$fd, ${index}(${base})"), [(set DRC:$fd, (FOp (add PRC:$base, PRC:$index)))]> { let fs = 0; } // FP indexed store. class FPIdxStore<bits<6> funct, string opstr, RegisterClass DRC, - RegisterClass PRC, PatFrag FOp>: + RegisterClass PRC, SDPatternOperator FOp= null_frag>: FFMemIdx<funct, (outs), (ins DRC:$fs, PRC:$base, PRC:$index), - !strconcat(opstr, "\t$fs, $index($base)"), + !strconcat(opstr, "\t$fs, ${index}(${base})"), [(FOp DRC:$fs, (add PRC:$base, PRC:$index))]> { let fd = 0; } @@ -270,7 +270,7 @@ let Predicates = [NotN64, HasStandardEncoding] in { } let Predicates = [NotN64, HasMips64, HasStandardEncoding], - DecoderNamespace = "Mips64" in { + DecoderNamespace = "Mips64" in { def LDC164 : FPLoad<0x35, "ldc1", FGR64, mem>; def SDC164 : FPStore<0x3d, "sdc1", FGR64, mem>; } @@ -283,9 +283,7 @@ let Predicates = [NotN64, NotMips64, HasStandardEncoding] in { // Indexed loads and stores. let Predicates = [HasMips32r2Or64, HasStandardEncoding] in { def LWXC1 : FPIdxLoad<0x0, "lwxc1", FGR32, CPURegs, load_a>; - def LUXC1 : FPIdxLoad<0x5, "luxc1", FGR32, CPURegs, load_u>; def SWXC1 : FPIdxStore<0x8, "swxc1", FGR32, CPURegs, store_a>; - def SUXC1 : FPIdxStore<0xd, "suxc1", FGR32, CPURegs, store_u>; } let Predicates = [HasMips32r2, NotMips64, HasStandardEncoding] in { @@ -301,13 +299,23 @@ let Predicates = [HasMips64, NotN64, HasStandardEncoding], DecoderNamespace="Mip // n64 let Predicates = [IsN64, HasStandardEncoding], isCodeGenOnly=1 in { def LWXC1_P8 : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load_a>; - def LUXC1_P8 : FPIdxLoad<0x5, "luxc1", FGR32, CPU64Regs, load_u>; def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load_a>; def SWXC1_P8 : FPIdxStore<0x8, "swxc1", FGR32, CPU64Regs, store_a>; - def SUXC1_P8 : FPIdxStore<0xd, "suxc1", FGR32, CPU64Regs, store_u>; def SDXC164_P8 : FPIdxStore<0x9, "sdxc1", FGR64, CPU64Regs, store_a>; } +// Load/store doubleword indexed unaligned. +let Predicates = [NotMips64, HasStandardEncoding] in { + def LUXC1 : FPIdxLoad<0x5, "luxc1", AFGR64, CPURegs>; + def SUXC1 : FPIdxStore<0xd, "suxc1", AFGR64, CPURegs>; +} + +let Predicates = [HasMips64, HasStandardEncoding], + DecoderNamespace="Mips64" in { + def LUXC164 : FPIdxLoad<0x5, "luxc1", FGR64, CPURegs>; + def SUXC164 : FPIdxStore<0xd, "suxc1", FGR64, CPURegs>; +} + /// Floating-point Aritmetic defm FADD : FFR2P_M<0x00, "add", fadd, 1>; defm FDIV : FFR2P_M<0x03, "div", fdiv>; @@ -408,25 +416,23 @@ let Defs=[FCR31] in { //===----------------------------------------------------------------------===// // Floating Point Pseudo-Instructions //===----------------------------------------------------------------------===// -def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src), - "# MOVCCRToCCR", []>; +def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCR:$src), + "# MOVCCRToCCR", []>; // This pseudo instr gets expanded into 2 mtc1 instrs after register // allocation. def BuildPairF64 : - MipsPseudo<(outs AFGR64:$dst), - (ins CPURegs:$lo, CPURegs:$hi), "", - [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>; + PseudoSE<(outs AFGR64:$dst), + (ins CPURegs:$lo, CPURegs:$hi), "", + [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>; // This pseudo instr gets expanded into 2 mfc1 instrs after register // allocation. // if n is 0, lower part of src is extracted. // if n is 1, higher part of src is extracted. def ExtractElementF64 : - MipsPseudo<(outs CPURegs:$dst), - (ins AFGR64:$src, i32imm:$n), "", - [(set CPURegs:$dst, - (MipsExtractElementF64 AFGR64:$src, imm:$n))]>; + PseudoSE<(outs CPURegs:$dst), (ins AFGR64:$src, i32imm:$n), "", + [(set CPURegs:$dst, (MipsExtractElementF64 AFGR64:$src, imm:$n))]>; //===----------------------------------------------------------------------===// // Floating Point Patterns @@ -466,17 +472,3 @@ let Predicates = [IsFP64bit, HasStandardEncoding] in { def : MipsPat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>; def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>; } - -// Patterns for unaligned floating point loads and stores. -let Predicates = [HasMips32r2Or64, NotN64, HasStandardEncoding] in { - def : MipsPat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>; - def : MipsPat<(store_u FGR32:$src, CPURegs:$addr), - (SUXC1 FGR32:$src, CPURegs:$addr, ZERO)>; -} - -let Predicates = [IsN64, HasStandardEncoding] in { - def : MipsPat<(f32 (load_u CPU64Regs:$addr)), - (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>; - def : MipsPat<(store_u FGR32:$src, CPU64Regs:$addr), - (SUXC1_P8 FGR32:$src, CPU64Regs:$addr, ZERO_64)>; -} diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 15a77fb..8feb853 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -70,25 +70,35 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern, let DecoderNamespace = "Mips"; field bits<32> SoftFail = 0; +} +// Mips32/64 Instruction Format +class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern, + InstrItinClass itin, Format f>: + MipsInst<outs, ins, asmstr, pattern, itin, f> { let Predicates = [HasStandardEncoding]; - } // Mips Pseudo Instructions Format class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>: - MipsInst<outs, ins, asmstr, pattern, IIPseudo, Pseudo> { + MipsInst<outs, ins, asmstr, pattern, IIPseudo, Pseudo> { let isCodeGenOnly = 1; let isPseudo = 1; } +// Mips32/64 Pseudo Instruction Format +class PseudoSE<dag outs, dag ins, string asmstr, list<dag> pattern>: + MipsPseudo<outs, ins, asmstr, pattern> { + let Predicates = [HasStandardEncoding]; +} + //===----------------------------------------------------------------------===// // Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|> //===----------------------------------------------------------------------===// class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst<outs, ins, asmstr, pattern, itin, FrmR> + InstSE<outs, ins, asmstr, pattern, itin, FrmR> { bits<5> rd; bits<5> rs; @@ -111,7 +121,7 @@ class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr, //===----------------------------------------------------------------------===// class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, - InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin, FrmI> + InstrItinClass itin>: InstSE<outs, ins, asmstr, pattern, itin, FrmI> { bits<5> rt; bits<5> rs; @@ -126,7 +136,7 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, class BranchBase<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst<outs, ins, asmstr, pattern, itin, FrmI> + InstSE<outs, ins, asmstr, pattern, itin, FrmI> { bits<5> rs; bits<5> rt; @@ -144,7 +154,7 @@ class BranchBase<bits<6> op, dag outs, dag ins, string asmstr, //===----------------------------------------------------------------------===// class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, - InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin, FrmJ> + InstrItinClass itin>: InstSE<outs, ins, asmstr, pattern, itin, FrmJ> { bits<26> addr; @@ -172,7 +182,7 @@ class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> : - MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmFR> + InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmFR> { bits<5> fd; bits<5> fs; @@ -196,7 +206,7 @@ class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins, //===----------------------------------------------------------------------===// class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>: - MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmFI> + InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmFI> { bits<5> ft; bits<5> base; @@ -214,7 +224,7 @@ class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>: //===----------------------------------------------------------------------===// class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> : - MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther> + InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> { bits<5> fs; bits<5> ft; @@ -235,7 +245,7 @@ class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> : class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr, list<dag> pattern> : - MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther> + InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> { bits<5> rd; bits<5> rs; @@ -256,7 +266,7 @@ class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr, class FFCMOV<bits<5> _fmt, bits<1> _tf, dag outs, dag ins, string asmstr, list<dag> pattern> : - MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther> + InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> { bits<5> fd; bits<5> fs; @@ -303,7 +313,7 @@ class FFR2P<bits<6> funct, bits<5> fmt, string opstr, // Floating point madd/msub/nmadd/nmsub. class FFMADDSUB<bits<3> funct, bits<3> fmt, dag outs, dag ins, string asmstr, list<dag> pattern> - : MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther> { + : InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> { bits<5> fd; bits<5> fr; bits<5> fs; @@ -321,7 +331,7 @@ class FFMADDSUB<bits<3> funct, bits<3> fmt, dag outs, dag ins, string asmstr, // FP indexed load/store instructions. class FFMemIdx<bits<6> funct, dag outs, dag ins, string asmstr, list<dag> pattern> : - MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther> + InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> { bits<5> base; bits<5> index; diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 458e4f7..50e3eb5 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -27,68 +27,19 @@ using namespace llvm; -MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm) +MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm, unsigned UncondBr) : MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP), - TM(tm), IsN64(TM.getSubtarget<MipsSubtarget>().isABI_N64()), - InMips16Mode(TM.getSubtarget<MipsSubtarget>().inMips16Mode()), - RI(*TM.getSubtargetImpl(), *this), - UncondBrOpc(TM.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J) {} + TM(tm), UncondBrOpc(UncondBr) {} -const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const { - return RI; -} +const MipsInstrInfo *MipsInstrInfo::create(MipsTargetMachine &TM) { + if (TM.getSubtargetImpl()->inMips16Mode()) + return llvm::createMips16InstrInfo(TM); -static bool isZeroImm(const MachineOperand &op) { - return op.isImm() && op.getImm() == 0; + return llvm::createMipsSEInstrInfo(TM); } -/// isLoadFromStackSlot - If the specified machine instruction is a direct -/// load from a stack slot, return the virtual or physical register number of -/// the destination along with the FrameIndex of the loaded stack slot. If -/// not, return 0. This predicate must return 0 if the instruction has -/// any side effects other than loading from the stack slot. -unsigned MipsInstrInfo:: -isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const -{ - unsigned Opc = MI->getOpcode(); - - if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) || - (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) || - (Opc == Mips::LDC1) || (Opc == Mips::LDC164) || - (Opc == Mips::LDC164_P8)) { - if ((MI->getOperand(1).isFI()) && // is a stack slot - (MI->getOperand(2).isImm()) && // the imm is zero - (isZeroImm(MI->getOperand(2)))) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - } - - return 0; -} - -/// isStoreToStackSlot - If the specified machine instruction is a direct -/// store to a stack slot, return the virtual or physical register number of -/// the source reg along with the FrameIndex of the loaded stack slot. If -/// not, return 0. This predicate must return 0 if the instruction has -/// any side effects other than storing to the stack slot. -unsigned MipsInstrInfo:: -isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const -{ - unsigned Opc = MI->getOpcode(); - - if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) || - (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) || - (Opc == Mips::SDC1) || (Opc == Mips::SDC164) || - (Opc == Mips::SDC164_P8)) { - if ((MI->getOperand(1).isFI()) && // is a stack slot - (MI->getOperand(2).isImm()) && // the imm is zero - (isZeroImm(MI->getOperand(2)))) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - } - return 0; +bool MipsInstrInfo::isZeroImm(const MachineOperand &op) const { + return op.isImm() && op.getImm() == 0; } /// insertNoop - If data hazard condition is found insert the target nop @@ -100,83 +51,8 @@ insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const BuildMI(MBB, MI, DL, get(Mips::NOP)); } -void MipsInstrInfo:: -copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { - unsigned Opc = 0, ZeroReg = 0; - - if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg. - if (Mips::CPURegsRegClass.contains(SrcReg)) { - if (InMips16Mode) - Opc=Mips::Mov32R16; - else { - Opc = Mips::ADDu, ZeroReg = Mips::ZERO; - } - } - else if (Mips::CCRRegClass.contains(SrcReg)) - Opc = Mips::CFC1; - else if (Mips::FGR32RegClass.contains(SrcReg)) - Opc = Mips::MFC1; - else if (SrcReg == Mips::HI) - Opc = Mips::MFHI, SrcReg = 0; - else if (SrcReg == Mips::LO) - Opc = Mips::MFLO, SrcReg = 0; - } - else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg. - if (Mips::CCRRegClass.contains(DestReg)) - Opc = Mips::CTC1; - else if (Mips::FGR32RegClass.contains(DestReg)) - Opc = Mips::MTC1; - else if (DestReg == Mips::HI) - Opc = Mips::MTHI, DestReg = 0; - else if (DestReg == Mips::LO) - Opc = Mips::MTLO, DestReg = 0; - } - else if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) - Opc = Mips::FMOV_S; - else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) - Opc = Mips::FMOV_D32; - else if (Mips::FGR64RegClass.contains(DestReg, SrcReg)) - Opc = Mips::FMOV_D64; - else if (Mips::CCRRegClass.contains(DestReg, SrcReg)) - Opc = Mips::MOVCCRToCCR; - else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg. - if (Mips::CPU64RegsRegClass.contains(SrcReg)) - Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64; - else if (SrcReg == Mips::HI64) - Opc = Mips::MFHI64, SrcReg = 0; - else if (SrcReg == Mips::LO64) - Opc = Mips::MFLO64, SrcReg = 0; - else if (Mips::FGR64RegClass.contains(SrcReg)) - Opc = Mips::DMFC1; - } - else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg. - if (DestReg == Mips::HI64) - Opc = Mips::MTHI64, DestReg = 0; - else if (DestReg == Mips::LO64) - Opc = Mips::MTLO64, DestReg = 0; - else if (Mips::FGR64RegClass.contains(DestReg)) - Opc = Mips::DMTC1; - } - - assert(Opc && "Cannot copy registers"); - - MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc)); - - if (DestReg) - MIB.addReg(DestReg, RegState::Define); - - if (ZeroReg) - MIB.addReg(ZeroReg); - - if (SrcReg) - MIB.addReg(SrcReg, getKillRegState(KillSrc)); -} - -static MachineMemOperand* GetMemOperand(MachineBasicBlock &MBB, int FI, - unsigned Flag) { +MachineMemOperand *MipsInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI, + unsigned Flag) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); @@ -185,130 +61,6 @@ static MachineMemOperand* GetMemOperand(MachineBasicBlock &MBB, int FI, MFI.getObjectSize(FI), Align); } -void MipsInstrInfo:: -storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); - - unsigned Opc = 0; - - if (Mips::CPURegsRegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::SW_P8 : Mips::SW; - else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::SD_P8 : Mips::SD; - else if (Mips::FGR32RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1; - else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) - Opc = Mips::SDC1; - else if (Mips::FGR64RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164; - - assert(Opc && "Register class not handled!"); - BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO); -} - -void MipsInstrInfo:: -loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const -{ - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); - unsigned Opc = 0; - - if (Mips::CPURegsRegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::LW_P8 : Mips::LW; - else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::LD_P8 : Mips::LD; - else if (Mips::FGR32RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1; - else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) - Opc = Mips::LDC1; - else if (Mips::FGR64RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164; - - assert(Opc && "Register class not handled!"); - BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0) - .addMemOperand(MMO); -} - -void MipsInstrInfo::ExpandRetRA(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned Opc) const { - BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(Opc)) - .addReg(Mips::RA); -} - -void MipsInstrInfo::ExpandRetRA16(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned Opc) const { - BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(Opc)); -} - -void MipsInstrInfo::ExpandExtractElementF64(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetInstrInfo *TII = TM.getInstrInfo(); - unsigned DstReg = I->getOperand(0).getReg(); - unsigned SrcReg = I->getOperand(1).getReg(); - unsigned N = I->getOperand(2).getImm(); - const MCInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1); - DebugLoc dl = I->getDebugLoc(); - - assert(N < 2 && "Invalid immediate"); - unsigned SubIdx = N ? Mips::sub_fpodd : Mips::sub_fpeven; - unsigned SubReg = TM.getRegisterInfo()->getSubReg(SrcReg, SubIdx); - - BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(SubReg); -} - -void MipsInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetInstrInfo *TII = TM.getInstrInfo(); - unsigned DstReg = I->getOperand(0).getReg(); - unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg(); - const MCInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1); - DebugLoc dl = I->getDebugLoc(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - - // mtc1 Lo, $fp - // mtc1 Hi, $fp + 1 - BuildMI(MBB, I, dl, Mtc1Tdd, TRI->getSubReg(DstReg, Mips::sub_fpeven)) - .addReg(LoReg); - BuildMI(MBB, I, dl, Mtc1Tdd, TRI->getSubReg(DstReg, Mips::sub_fpodd)) - .addReg(HiReg); -} - -bool MipsInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { - MachineBasicBlock &MBB = *MI->getParent(); - - switch(MI->getDesc().getOpcode()) { - default: - return false; - case Mips::RetRA: - ExpandRetRA(MBB, MI, Mips::RET); - break; - case Mips::RetRA16: - ExpandRetRA16(MBB, MI, Mips::JrRa16); - break; - case Mips::BuildPairF64: - ExpandBuildPairF64(MBB, MI); - break; - case Mips::ExtractElementF64: - ExpandExtractElementF64(MBB, MI); - break; - } - - MBB.erase(MI); - return true; -} - MachineInstr* MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *MDPtr, @@ -322,42 +74,9 @@ MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, // Branch Analysis //===----------------------------------------------------------------------===// -static unsigned GetAnalyzableBrOpc(unsigned Opc) { - return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ || - Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || - Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 || - Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 || - Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B || - Opc == Mips::J) ? - Opc : 0; -} - -/// GetOppositeBranchOpc - Return the inverse of the specified -/// opcode, e.g. turning BEQ to BNE. -unsigned Mips::GetOppositeBranchOpc(unsigned Opc) -{ - switch (Opc) { - default: llvm_unreachable("Illegal opcode!"); - case Mips::BEQ: return Mips::BNE; - case Mips::BNE: return Mips::BEQ; - case Mips::BGTZ: return Mips::BLEZ; - case Mips::BGEZ: return Mips::BLTZ; - case Mips::BLTZ: return Mips::BGEZ; - case Mips::BLEZ: return Mips::BGTZ; - case Mips::BEQ64: return Mips::BNE64; - case Mips::BNE64: return Mips::BEQ64; - case Mips::BGTZ64: return Mips::BLEZ64; - case Mips::BGEZ64: return Mips::BLTZ64; - case Mips::BLTZ64: return Mips::BGEZ64; - case Mips::BLEZ64: return Mips::BGTZ64; - case Mips::BC1T: return Mips::BC1F; - case Mips::BC1F: return Mips::BC1T; - } -} - -static void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, - MachineBasicBlock *&BB, - SmallVectorImpl<MachineOperand> &Cond) { +void MipsInstrInfo::AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, + MachineBasicBlock *&BB, + SmallVectorImpl<MachineOperand> &Cond) const { assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch"); int NumOp = Inst->getNumExplicitOperands(); @@ -527,7 +246,7 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { assert( (Cond.size() && Cond.size() <= 3) && "Invalid Mips branch condition!"); - Cond[0].setImm(Mips::GetOppositeBranchOpc(Cond[0].getImm())); + Cond[0].setImm(GetOppositeBranchOpc(Cond[0].getImm())); return false; } diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 358f817..7d56259 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -26,99 +26,69 @@ namespace llvm { class MipsInstrInfo : public MipsGenInstrInfo { +protected: MipsTargetMachine &TM; - bool IsN64; bool InMips16Mode; - const MipsRegisterInfo RI; unsigned UncondBrOpc; + public: - explicit MipsInstrInfo(MipsTargetMachine &TM); + explicit MipsInstrInfo(MipsTargetMachine &TM, unsigned UncondBrOpc); - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As - /// such, whenever a client has an instance of instruction info, it should - /// always be able to get register info as well (through this method). - /// - virtual const MipsRegisterInfo &getRegisterInfo() const; - - /// isLoadFromStackSlot - If the specified machine instruction is a direct - /// load from a stack slot, return the virtual or physical register number of - /// the destination along with the FrameIndex of the loaded stack slot. If - /// not, return 0. This predicate must return 0 if the instruction has - /// any side effects other than loading from the stack slot. - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - - /// isStoreToStackSlot - If the specified machine instruction is a direct - /// store to a stack slot, return the virtual or physical register number of - /// the source reg along with the FrameIndex of the loaded stack slot. If - /// not, return 0. This predicate must return 0 if the instruction has - /// any side effects other than storing to the stack slot. - virtual unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const; + static const MipsInstrInfo *create(MipsTargetMachine &TM); /// Branch Analysis virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const; - virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - -private: - void ExpandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned Opc) const; - void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned Opc) const; - void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL, - const SmallVectorImpl<MachineOperand>& Cond) const; - void ExpandExtractElementF64(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - void ExpandBuildPairF64(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; -public: virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const; - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + + virtual + bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; virtual MachineInstr* emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *MDPtr, DebugLoc DL) const; - virtual - bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; - /// Insert nop instruction when hazard condition is found virtual void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const MipsRegisterInfo &getRegisterInfo() const = 0; + + virtual unsigned GetOppositeBranchOpc(unsigned Opc) const = 0; + /// Return the number of bytes of code the specified instruction may be. unsigned GetInstSizeInBytes(const MachineInstr *MI) const; + +protected: + bool isZeroImm(const MachineOperand &op) const; + + MachineMemOperand *GetMemOperand(MachineBasicBlock &MBB, int FI, + unsigned Flag) const; + +private: + virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const = 0; + + void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, + MachineBasicBlock *&BB, + SmallVectorImpl<MachineOperand> &Cond) const; + + void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL, + const SmallVectorImpl<MachineOperand>& Cond) const; }; namespace Mips { - /// GetOppositeBranchOpc - Return the inverse of the specified - /// opcode, e.g. turning BEQ to BNE. - unsigned GetOppositeBranchOpc(unsigned Opc); - /// Emit a series of instructions to load an immediate. All instructions /// except for the last one are emitted. The function returns the number of /// MachineInstrs generated. The opcode-immediate pair of the last @@ -130,6 +100,10 @@ namespace Mips { MipsAnalyzeImmediate::Inst *LastInst); } +/// Create MipsInstrInfo objects. +const MipsInstrInfo *createMips16InstrInfo(MipsTargetMachine &TM); +const MipsInstrInfo *createMipsSEInstrInfo(MipsTargetMachine &TM); + } #endif diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index f1aada4..fd952ef 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -208,17 +208,24 @@ def uimm16 : Operand<i32> { let PrintMethod = "printUnsignedImm"; } +def MipsMemAsmOperand : AsmOperandClass { + let Name = "Mem"; + let ParserMethod = "parseMemOperand"; +} + // Address operand def mem : Operand<i32> { let PrintMethod = "printMemOperand"; let MIOperandInfo = (ops CPURegs, simm16); let EncoderMethod = "getMemEncoding"; + let ParserMatchClass = MipsMemAsmOperand; } def mem64 : Operand<i64> { let PrintMethod = "printMemOperand"; let MIOperandInfo = (ops CPU64Regs, simm16_64); let EncoderMethod = "getMemEncoding"; + let ParserMatchClass = MipsMemAsmOperand; } def mem_ea : Operand<i32> { @@ -722,9 +729,11 @@ class MoveToLOHI<bits<6> func, string instr_asm, RegisterClass RC, let neverHasSideEffects = 1; } -class EffectiveAddress<string instr_asm, RegisterClass RC, Operand Mem> : - FMem<0x09, (outs RC:$rt), (ins Mem:$addr), - instr_asm, [(set RC:$rt, addr:$addr)], IIAlu>; +class EffectiveAddress<bits<6> opc, string instr_asm, RegisterClass RC, Operand Mem> : + FMem<opc, (outs RC:$rt), (ins Mem:$addr), + instr_asm, [(set RC:$rt, addr:$addr)], IIAlu> { + let isCodeGenOnly = 1; +} // Count Leading Ones/Zeros in Word class CountLeading0<bits<6> func, string instr_asm, RegisterClass RC>: @@ -803,9 +812,9 @@ class InsBase<bits<6> _funct, string instr_asm, RegisterClass RC>: // Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*). class Atomic2Ops<PatFrag Op, string Opstr, RegisterClass DRC, RegisterClass PRC> : - MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr), - !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"), - [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>; + PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr), + !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"), + [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>; multiclass Atomic2Ops32<PatFrag Op, string Opstr> { def #NAME# : Atomic2Ops<Op, Opstr, CPURegs, CPURegs>, @@ -819,9 +828,9 @@ multiclass Atomic2Ops32<PatFrag Op, string Opstr> { // Atomic Compare & Swap. class AtomicCmpSwap<PatFrag Op, string Width, RegisterClass DRC, RegisterClass PRC> : - MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap), - !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"), - [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>; + PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap), + !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"), + [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>; multiclass AtomicCmpSwap32<PatFrag Op, string Width> { def #NAME# : AtomicCmpSwap<Op, Width, CPURegs, CPURegs>, @@ -851,14 +860,13 @@ class SCBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> : // Return RA. let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in -def RetRA : MipsPseudo<(outs), (ins), "", [(MipsRet)]>; +def RetRA : PseudoSE<(outs), (ins), "", [(MipsRet)]>; -// As stack alignment is always done with addiu, we need a 16-bit immediate -let Defs = [SP], Uses = [SP] in { -def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins uimm16:$amt), +let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { +def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins i32imm:$amt), "!ADJCALLSTACKDOWN $amt", [(callseq_start timm:$amt)]>; -def ADJCALLSTACKUP : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2), +def ADJCALLSTACKUP : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), "!ADJCALLSTACKUP $amt1", [(callseq_end timm:$amt1, timm:$amt2)]>; } @@ -868,8 +876,8 @@ def ADJCALLSTACKUP : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2), // are used, we have the same behavior, but get also a bunch of warnings // from the assembler. let neverHasSideEffects = 1 in -def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc, CPURegs:$gp), - ".cprestore\t$loc", []>; +def CPRESTORE : PseudoSE<(outs), (ins i32imm:$loc, CPURegs:$gp), + ".cprestore\t$loc", []>; let usesCustomInserter = 1 in { defm ATOMIC_LOAD_ADD_I8 : Atomic2Ops32<atomic_load_add_8, "load_add_8">; @@ -969,8 +977,8 @@ defm SWL : StoreLeftRightM32<0x2a, "swl", MipsSWL>; defm SWR : StoreLeftRightM32<0x2e, "swr", MipsSWR>; let hasSideEffects = 1 in -def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype", - [(MipsSync imm:$stype)], NoItinerary, FrmOther> +def SYNC : InstSE<(outs), (ins i32imm:$stype), "sync $stype", + [(MipsSync imm:$stype)], NoItinerary, FrmOther> { bits<5> stype; let Opcode = 0; @@ -1046,17 +1054,13 @@ let addr=0 in // instructions. The same not happens for stack address copies, so an // add op with mem ComplexPattern is used and the stack address copy // can be matched. It's similar to Sparc LEA_ADDRi -def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea> { - let isCodeGenOnly = 1; -} +def LEA_ADDiu : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>; // DynAlloc node points to dynamically allocated stack space. // $sp is added to the list of implicitly used registers to prevent dead code // elimination from removing instructions that modify $sp. let Uses = [SP] in -def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea> { - let isCodeGenOnly = 1; -} +def DynAlloc : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>; // MADD*/MSUB* def MADD : MArithR<0, "madd", MipsMAdd, 1>; diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp index 150bdbb..052046a 100644 --- a/lib/Target/Mips/MipsJITInfo.cpp +++ b/lib/Target/Mips/MipsJITInfo.cpp @@ -27,7 +27,52 @@ using namespace llvm; void MipsJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { - report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction"); + unsigned NewAddr = (intptr_t)New; + unsigned OldAddr = (intptr_t)Old; + const unsigned NopInstr = 0x0; + + // If the functions are in the same memory segment, insert PC-region branch. + if ((NewAddr & 0xF0000000) == ((OldAddr + 4) & 0xF0000000)) { + unsigned *OldInstruction = (unsigned *)Old; + *OldInstruction = 0x08000000; + unsigned JTargetAddr = NewAddr & 0x0FFFFFFC; + + JTargetAddr >>= 2; + *OldInstruction |= JTargetAddr; + + // Insert a NOP. + OldInstruction++; + *OldInstruction = NopInstr; + + sys::Memory::InvalidateInstructionCache(Old, 2 * 4); + } else { + // We need to clear hint bits from the instruction, in case it is 'jr ra'. + const unsigned HintMask = 0xFFFFF83F, ReturnSequence = 0x03e00008; + unsigned* CurrentInstr = (unsigned*)Old; + unsigned CurrInstrHintClear = (*CurrentInstr) & HintMask; + unsigned* NextInstr = CurrentInstr + 1; + unsigned NextInstrHintClear = (*NextInstr) & HintMask; + + // Do absolute jump if there are 2 or more instructions before return from + // the old function. + if ((CurrInstrHintClear != ReturnSequence) && + (NextInstrHintClear != ReturnSequence)) { + const unsigned LuiT0Instr = 0x3c080000, AddiuT0Instr = 0x25080000; + const unsigned JrT0Instr = 0x01000008; + // lui t0, high 16 bit of the NewAddr + (*(CurrentInstr++)) = LuiT0Instr | ((NewAddr & 0xffff0000) >> 16); + // addiu t0, t0, low 16 bit of the NewAddr + (*(CurrentInstr++)) = AddiuT0Instr | (NewAddr & 0x0000ffff); + // jr t0 + (*(CurrentInstr++)) = JrT0Instr; + (*CurrentInstr) = NopInstr; + + sys::Memory::InvalidateInstructionCache(Old, 4 * 4); + } else { + // Unsupported case + report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction"); + } + } } /// JITCompilerFunction - This contains the address of the JIT function used to diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index 70ecbc1..f78203f 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -207,7 +207,7 @@ int64_t MipsLongBranch::computeOffset(const MachineInstr *Br) { // MachineBasicBlock operand MBBOpnd. void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br, DebugLoc DL, MachineBasicBlock *MBBOpnd) { - unsigned NewOpc = Mips::GetOppositeBranchOpc(Br->getOpcode()); + unsigned NewOpc = TII->GetOppositeBranchOpc(Br->getOpcode()); const MCInstrDesc &NewDesc = TII->get(NewOpc); MachineInstrBuilder MIB = BuildMI(MBB, Br, DL, NewDesc); diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index b2232c6..df3c4c0 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -48,8 +48,6 @@ class MipsFunctionInfo : public MachineFunctionInfo { // OutArgFIRange: Range of indices of all frame objects created during call to // LowerCall except for the frame object for restoring $gp. std::pair<int, int> InArgFIRange, OutArgFIRange; - int GlobalRegFI; - mutable int DynAllocFI; // Frame index of dynamically allocated stack area. unsigned MaxCallFrameSize; bool EmitNOAT; @@ -58,8 +56,7 @@ public: MipsFunctionInfo(MachineFunction& MF) : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), - OutArgFIRange(std::make_pair(-1, 0)), GlobalRegFI(0), DynAllocFI(0), - MaxCallFrameSize(0), EmitNOAT(false) + OutArgFIRange(std::make_pair(-1, 0)), MaxCallFrameSize(0), EmitNOAT(false) {} bool isInArgFI(int FI) const { @@ -77,34 +74,6 @@ public: OutArgFIRange.second = LastFI; } - bool isGlobalRegFI(int FI) const { - return GlobalRegFI && (FI == GlobalRegFI); - } - - int getGlobalRegFI() const { - return GlobalRegFI; - } - - int initGlobalRegFI() { - const TargetMachine &TM = MF.getTarget(); - unsigned RegSize = TM.getSubtarget<MipsSubtarget>().isABI_N64() ? 8 : 4; - int64_t StackAlignment = TM.getFrameLowering()->getStackAlignment(); - uint64_t Offset = RoundUpToAlignment(MaxCallFrameSize, StackAlignment); - - GlobalRegFI = MF.getFrameInfo()->CreateFixedObject(RegSize, Offset, true); - return GlobalRegFI; - } - - // The first call to this function creates a frame object for dynamically - // allocated stack area. - int getDynAllocFI() const { - if (!DynAllocFI) - DynAllocFI = MF.getFrameInfo()->CreateFixedObject(4, 0, true); - - return DynAllocFI; - } - bool isDynAllocFI(int FI) const { return DynAllocFI && DynAllocFI == FI; } - unsigned getSRetReturnReg() const { return SRetReturnReg; } void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index a3ce236..ae6ae3a 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -144,15 +144,6 @@ MipsRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { return true; } -// This function eliminate ADJCALLSTACKDOWN, -// ADJCALLSTACKUP pseudo instructions -void MipsRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. - MBB.erase(I); -} - // FrameIndex represent objects inside a abstract stack. // We must replace FrameIndex with an stack/frame pointer // direct reference. @@ -161,8 +152,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const { MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); unsigned i = 0; while (!MI.getOperand(i).isFI()) { @@ -182,68 +171,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, << "spOffset : " << spOffset << "\n" << "stackSize : " << stackSize << "\n"); - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - int MinCSFI = 0; - int MaxCSFI = -1; - - if (CSI.size()) { - MinCSFI = CSI[0].getFrameIdx(); - MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); - } - - // The following stack frame objects are always referenced relative to $sp: - // 1. Outgoing arguments. - // 2. Pointer to dynamically allocated stack space. - // 3. Locations for callee-saved registers. - // Everything else is referenced relative to whatever register - // getFrameRegister() returns. - unsigned FrameReg; - - if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) || - (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) - FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; - else - FrameReg = getFrameRegister(MF); - - // Calculate final offset. - // - There is no need to change the offset if the frame object is one of the - // following: an outgoing argument, pointer to a dynamically allocated - // stack space or a $gp restore location, - // - If the frame object is any of the following, its offset must be adjusted - // by adding the size of the stack: - // incoming argument, callee-saved register location or local variable. - int64_t Offset; - - if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) || - MipsFI->isGlobalRegFI(FrameIndex)) - Offset = spOffset; - else - Offset = spOffset + (int64_t)stackSize; - - Offset += MI.getOperand(i+1).getImm(); - - DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); - - // If MI is not a debug value, make sure Offset fits in the 16-bit immediate - // field. - if (!MI.isDebugValue() && !isInt<16>(Offset)) { - MachineBasicBlock &MBB = *MI.getParent(); - DebugLoc DL = II->getDebugLoc(); - unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu; - unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT; - MipsAnalyzeImmediate::Inst LastInst(0, 0); - - MipsFI->setEmitNOAT(); - Mips::loadImmediate(Offset, Subtarget.isABI_N64(), TII, MBB, II, DL, true, - &LastInst); - BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg); - - FrameReg = ATReg; - Offset = SignExtend64<16>(LastInst.ImmOpnd); - } - - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i+1).ChangeToImmediate(Offset); + eliminateFI(MI, i, FrameIndex, stackSize, spOffset); } unsigned MipsRegisterInfo:: diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index f320bae..9a05e94 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -25,10 +25,12 @@ class MipsSubtarget; class TargetInstrInfo; class Type; -struct MipsRegisterInfo : public MipsGenRegisterInfo { +class MipsRegisterInfo : public MipsGenRegisterInfo { +protected: const MipsSubtarget &Subtarget; const TargetInstrInfo &TII; +public: MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii); /// getRegisterNumbering - Given the enum value for some register, e.g. @@ -51,10 +53,6 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - /// Stack Frame Processing Methods void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; @@ -67,6 +65,11 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { /// Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; + +private: + virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, + int FrameIndex, uint64_t StackSize, + int64_t SPOffset) const = 0; }; } // end namespace llvm diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index b255e42..4015add 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -239,6 +239,9 @@ let Namespace = "Mips" in { // fcc0 register def FCC0 : Register<"fcc0">; + // PC register + def PC : Register<"pc">; + // Hardware register $29 def HWR29 : Register<"29">; def HWR29_64 : Register<"29">; diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp new file mode 100644 index 0000000..1c59847 --- /dev/null +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -0,0 +1,210 @@ +//===-- MipsSEFrameLowering.cpp - Mips32/64 Frame Information -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips32/64 implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "MipsSEFrameLowering.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsSEInstrInfo.h" +#include "MipsMachineFunction.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const MipsRegisterInfo *RegInfo = + static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo()); + const MipsSEInstrInfo &TII = + *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo()); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; + unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; + unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; + unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; + + // First, compute final stack size. + uint64_t StackSize = MFI->getStackSize(); + + // No need to allocate space on the stack. + if (StackSize == 0 && !MFI->adjustsStack()) return; + + MachineModuleInfo &MMI = MF.getMMI(); + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + MachineLocation DstML, SrcML; + + // Adjust stack. + TII.adjustStackPtr(SP, -StackSize, MBB, MBBI); + + // emit ".cfi_def_cfa_offset StackSize" + MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, + TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel); + DstML = MachineLocation(MachineLocation::VirtualFP); + SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize); + Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML)); + + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + + if (CSI.size()) { + // Find the instruction past the last instruction that saves a callee-saved + // register to the stack. + for (unsigned i = 0; i < CSI.size(); ++i) + ++MBBI; + + // Iterate over list of callee-saved registers and emit .cfi_offset + // directives. + MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, + TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel); + + for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), + E = CSI.end(); I != E; ++I) { + int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); + unsigned Reg = I->getReg(); + + // If Reg is a double precision register, emit two cfa_offsets, + // one for each of the paired single precision registers. + if (Mips::AFGR64RegClass.contains(Reg)) { + MachineLocation DstML0(MachineLocation::VirtualFP, Offset); + MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4); + MachineLocation SrcML0(RegInfo->getSubReg(Reg, Mips::sub_fpeven)); + MachineLocation SrcML1(RegInfo->getSubReg(Reg, Mips::sub_fpodd)); + + if (!STI.isLittle()) + std::swap(SrcML0, SrcML1); + + Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0)); + Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1)); + } else { + // Reg is either in CPURegs or FGR32. + DstML = MachineLocation(MachineLocation::VirtualFP, Offset); + SrcML = MachineLocation(Reg); + Moves.push_back(MachineMove(CSLabel, DstML, SrcML)); + } + } + } + + // if framepointer enabled, set it to point to the stack pointer. + if (hasFP(MF)) { + // Insert instruction "move $fp, $sp" at this location. + BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO); + + // emit ".cfi_def_cfa_register $fp" + MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, + TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel); + DstML = MachineLocation(FP); + SrcML = MachineLocation(MachineLocation::VirtualFP); + Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML)); + } +} + +void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const MipsSEInstrInfo &TII = + *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo()); + DebugLoc dl = MBBI->getDebugLoc(); + unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; + unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; + unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; + unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; + + // if framepointer enabled, restore the stack pointer. + if (hasFP(MF)) { + // Find the first instruction that restores a callee-saved register. + MachineBasicBlock::iterator I = MBBI; + + for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i) + --I; + + // Insert instruction "move $sp, $fp" at this location. + BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO); + } + + // Get the number of bytes from FrameInfo + uint64_t StackSize = MFI->getStackSize(); + + if (!StackSize) + return; + + // Adjust stack. + TII.adjustStackPtr(SP, StackSize, MBB, MBBI); +} + +bool MipsSEFrameLowering:: +spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + MachineFunction *MF = MBB.getParent(); + MachineBasicBlock *EntryBlock = MF->begin(); + const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + // Add the callee-saved register as live-in. Do not add if the register is + // RA and return address is taken, because it has already been added in + // method MipsTargetLowering::LowerRETURNADDR. + // It's killed at the spill, unless the register is RA and return address + // is taken. + unsigned Reg = CSI[i].getReg(); + bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA || Reg == Mips::RA_64) + && MF->getFrameInfo()->isReturnAddressTaken(); + if (!IsRAAndRetAddrIsTaken) + EntryBlock->addLiveIn(Reg); + + // Insert the spill to the stack frame. + bool IsKill = !IsRAAndRetAddrIsTaken; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*EntryBlock, MI, Reg, IsKill, + CSI[i].getFrameIdx(), RC, TRI); + } + + return true; +} + +bool +MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Reserve call frame if the size of the maximum call frame fits into 16-bit + // immediate field and there are no variable sized objects on the stack. + return isInt<16>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects(); +} + +void MipsSEFrameLowering:: +processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; + + // Mark $fp as used if function has dedicated frame pointer. + if (hasFP(MF)) + MRI.setPhysRegUsed(FP); +} + +const MipsFrameLowering * +llvm::createMipsSEFrameLowering(const MipsSubtarget &ST) { + return new MipsSEFrameLowering(ST); +} diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h new file mode 100644 index 0000000..6481a0a --- /dev/null +++ b/lib/Target/Mips/MipsSEFrameLowering.h @@ -0,0 +1,44 @@ +//===-- MipsSEFrameLowering.h - Mips32/64 frame lowering --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSSE_FRAMEINFO_H +#define MIPSSE_FRAMEINFO_H + +#include "MipsFrameLowering.h" + +namespace llvm { + +class MipsSEFrameLowering : public MipsFrameLowering { +public: + explicit MipsSEFrameLowering(const MipsSubtarget &STI) + : MipsFrameLowering(STI) {} + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + bool hasReservedCallFrame(const MachineFunction &MF) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp new file mode 100644 index 0000000..eeb1de3 --- /dev/null +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -0,0 +1,320 @@ +//===-- MipsSEInstrInfo.cpp - Mips32/64 Instruction Information -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips32/64 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "MipsSEInstrInfo.h" +#include "MipsTargetMachine.h" +#include "MipsMachineFunction.h" +#include "InstPrinter/MipsInstPrinter.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/ADT/STLExtras.h" + +using namespace llvm; + +MipsSEInstrInfo::MipsSEInstrInfo(MipsTargetMachine &tm) + : MipsInstrInfo(tm, + tm.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J), + RI(*tm.getSubtargetImpl(), *this), + IsN64(tm.getSubtarget<MipsSubtarget>().isABI_N64()) {} + +const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const { + return RI; +} + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned MipsSEInstrInfo:: +isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const +{ + unsigned Opc = MI->getOpcode(); + + if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) || + (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) || + (Opc == Mips::LDC1) || (Opc == Mips::LDC164) || + (Opc == Mips::LDC164_P8)) { + if ((MI->getOperand(1).isFI()) && // is a stack slot + (MI->getOperand(2).isImm()) && // the imm is zero + (isZeroImm(MI->getOperand(2)))) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + } + + return 0; +} + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned MipsSEInstrInfo:: +isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const +{ + unsigned Opc = MI->getOpcode(); + + if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) || + (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) || + (Opc == Mips::SDC1) || (Opc == Mips::SDC164) || + (Opc == Mips::SDC164_P8)) { + if ((MI->getOperand(1).isFI()) && // is a stack slot + (MI->getOperand(2).isImm()) && // the imm is zero + (isZeroImm(MI->getOperand(2)))) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + } + return 0; +} + +void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc = 0, ZeroReg = 0; + + if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg. + if (Mips::CPURegsRegClass.contains(SrcReg)) + Opc = Mips::ADDu, ZeroReg = Mips::ZERO; + else if (Mips::CCRRegClass.contains(SrcReg)) + Opc = Mips::CFC1; + else if (Mips::FGR32RegClass.contains(SrcReg)) + Opc = Mips::MFC1; + else if (SrcReg == Mips::HI) + Opc = Mips::MFHI, SrcReg = 0; + else if (SrcReg == Mips::LO) + Opc = Mips::MFLO, SrcReg = 0; + } + else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg. + if (Mips::CCRRegClass.contains(DestReg)) + Opc = Mips::CTC1; + else if (Mips::FGR32RegClass.contains(DestReg)) + Opc = Mips::MTC1; + else if (DestReg == Mips::HI) + Opc = Mips::MTHI, DestReg = 0; + else if (DestReg == Mips::LO) + Opc = Mips::MTLO, DestReg = 0; + } + else if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) + Opc = Mips::FMOV_S; + else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) + Opc = Mips::FMOV_D32; + else if (Mips::FGR64RegClass.contains(DestReg, SrcReg)) + Opc = Mips::FMOV_D64; + else if (Mips::CCRRegClass.contains(DestReg, SrcReg)) + Opc = Mips::MOVCCRToCCR; + else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg. + if (Mips::CPU64RegsRegClass.contains(SrcReg)) + Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64; + else if (SrcReg == Mips::HI64) + Opc = Mips::MFHI64, SrcReg = 0; + else if (SrcReg == Mips::LO64) + Opc = Mips::MFLO64, SrcReg = 0; + else if (Mips::FGR64RegClass.contains(SrcReg)) + Opc = Mips::DMFC1; + } + else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg. + if (DestReg == Mips::HI64) + Opc = Mips::MTHI64, DestReg = 0; + else if (DestReg == Mips::LO64) + Opc = Mips::MTLO64, DestReg = 0; + else if (Mips::FGR64RegClass.contains(DestReg)) + Opc = Mips::DMTC1; + } + + assert(Opc && "Cannot copy registers"); + + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc)); + + if (DestReg) + MIB.addReg(DestReg, RegState::Define); + + if (ZeroReg) + MIB.addReg(ZeroReg); + + if (SrcReg) + MIB.addReg(SrcReg, getKillRegState(KillSrc)); +} + +void MipsSEInstrInfo:: +storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); + + unsigned Opc = 0; + + if (Mips::CPURegsRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::SW_P8 : Mips::SW; + else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::SD_P8 : Mips::SD; + else if (Mips::FGR32RegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1; + else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) + Opc = Mips::SDC1; + else if (Mips::FGR64RegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164; + + assert(Opc && "Register class not handled!"); + BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); +} + +void MipsSEInstrInfo:: +loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const +{ + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); + unsigned Opc = 0; + + if (Mips::CPURegsRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LW_P8 : Mips::LW; + else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LD_P8 : Mips::LD; + else if (Mips::FGR32RegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1; + else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) + Opc = Mips::LDC1; + else if (Mips::FGR64RegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164; + + assert(Opc && "Register class not handled!"); + BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0) + .addMemOperand(MMO); +} + +bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + MachineBasicBlock &MBB = *MI->getParent(); + + switch(MI->getDesc().getOpcode()) { + default: + return false; + case Mips::RetRA: + ExpandRetRA(MBB, MI, Mips::RET); + break; + case Mips::BuildPairF64: + ExpandBuildPairF64(MBB, MI); + break; + case Mips::ExtractElementF64: + ExpandExtractElementF64(MBB, MI); + break; + } + + MBB.erase(MI); + return true; +} + +/// GetOppositeBranchOpc - Return the inverse of the specified +/// opcode, e.g. turning BEQ to BNE. +unsigned MipsSEInstrInfo::GetOppositeBranchOpc(unsigned Opc) const { + switch (Opc) { + default: llvm_unreachable("Illegal opcode!"); + case Mips::BEQ: return Mips::BNE; + case Mips::BNE: return Mips::BEQ; + case Mips::BGTZ: return Mips::BLEZ; + case Mips::BGEZ: return Mips::BLTZ; + case Mips::BLTZ: return Mips::BGEZ; + case Mips::BLEZ: return Mips::BGTZ; + case Mips::BEQ64: return Mips::BNE64; + case Mips::BNE64: return Mips::BEQ64; + case Mips::BGTZ64: return Mips::BLEZ64; + case Mips::BGEZ64: return Mips::BLTZ64; + case Mips::BLTZ64: return Mips::BGEZ64; + case Mips::BLEZ64: return Mips::BGTZ64; + case Mips::BC1T: return Mips::BC1F; + case Mips::BC1F: return Mips::BC1T; + } +} + +/// Adjust SP by Amount bytes. +void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; + unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; + + if (isInt<16>(Amount))// addi sp, sp, amount + BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount); + else { // Expand immediate that doesn't fit in 16-bit. + unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT; + + MBB.getParent()->getInfo<MipsFunctionInfo>()->setEmitNOAT(); + Mips::loadImmediate(Amount, STI.isABI_N64(), *this, MBB, I, DL, false, 0); + BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(ATReg); + } +} + +unsigned MipsSEInstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { + return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ || + Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || + Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 || + Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 || + Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B || + Opc == Mips::J) ? + Opc : 0; +} + +void MipsSEInstrInfo::ExpandRetRA(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned Opc) const { + BuildMI(MBB, I, I->getDebugLoc(), get(Opc)).addReg(Mips::RA); +} + +void MipsSEInstrInfo::ExpandExtractElementF64(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + unsigned DstReg = I->getOperand(0).getReg(); + unsigned SrcReg = I->getOperand(1).getReg(); + unsigned N = I->getOperand(2).getImm(); + const MCInstrDesc& Mfc1Tdd = get(Mips::MFC1); + DebugLoc dl = I->getDebugLoc(); + + assert(N < 2 && "Invalid immediate"); + unsigned SubIdx = N ? Mips::sub_fpodd : Mips::sub_fpeven; + unsigned SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx); + + BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(SubReg); +} + +void MipsSEInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + unsigned DstReg = I->getOperand(0).getReg(); + unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg(); + const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1); + DebugLoc dl = I->getDebugLoc(); + const TargetRegisterInfo &TRI = getRegisterInfo(); + + // mtc1 Lo, $fp + // mtc1 Hi, $fp + 1 + BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpeven)) + .addReg(LoReg); + BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpodd)) + .addReg(HiReg); +} + +const MipsInstrInfo *llvm::createMipsSEInstrInfo(MipsTargetMachine &TM) { + return new MipsSEInstrInfo(TM); +} diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h new file mode 100644 index 0000000..346e74d --- /dev/null +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -0,0 +1,86 @@ +//===-- MipsSEInstrInfo.h - Mips32/64 Instruction Information ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips32/64 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSSEINSTRUCTIONINFO_H +#define MIPSSEINSTRUCTIONINFO_H + +#include "MipsInstrInfo.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsSERegisterInfo.h" + +namespace llvm { + +class MipsSEInstrInfo : public MipsInstrInfo { + const MipsSERegisterInfo RI; + bool IsN64; + +public: + explicit MipsSEInstrInfo(MipsTargetMachine &TM); + + virtual const MipsRegisterInfo &getRegisterInfo() const; + + /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + + virtual unsigned GetOppositeBranchOpc(unsigned Opc) const; + + /// Adjust SP by Amount bytes. + void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + +private: + virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; + + void ExpandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned Opc) const; + void ExpandExtractElementF64(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + void ExpandBuildPairF64(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; +}; + +} + +#endif diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp new file mode 100644 index 0000000..043a1ef --- /dev/null +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -0,0 +1,138 @@ +//===-- MipsSERegisterInfo.cpp - MIPS32/64 Register Information -== -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MIPS32/64 implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#include "MipsSERegisterInfo.h" +#include "Mips.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsSEInstrInfo.h" +#include "MipsSubtarget.h" +#include "MipsMachineFunction.h" +#include "llvm/Constants.h" +#include "llvm/DebugInfo.h" +#include "llvm/Type.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" + +using namespace llvm; + +MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &ST, + const TargetInstrInfo &TII) + : MipsRegisterInfo(ST, TII) {} + +// This function eliminate ADJCALLSTACKDOWN, +// ADJCALLSTACKUP pseudo instructions +void MipsSERegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (!TFI->hasReservedCallFrame(MF)) { + int64_t Amount = I->getOperand(0).getImm(); + + if (I->getOpcode() == Mips::ADJCALLSTACKDOWN) + Amount = -Amount; + + const MipsSEInstrInfo *II = static_cast<const MipsSEInstrInfo*>(&TII); + unsigned SP = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; + + II->adjustStackPtr(SP, Amount, MBB, I); + } + + MBB.erase(I); +} + +void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, + unsigned OpNo, int FrameIndex, + uint64_t StackSize, + int64_t SPOffset) const { + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + // The following stack frame objects are always referenced relative to $sp: + // 1. Outgoing arguments. + // 2. Pointer to dynamically allocated stack space. + // 3. Locations for callee-saved registers. + // Everything else is referenced relative to whatever register + // getFrameRegister() returns. + unsigned FrameReg; + + if (MipsFI->isOutArgFI(FrameIndex) || + (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) + FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; + else + FrameReg = getFrameRegister(MF); + + // Calculate final offset. + // - There is no need to change the offset if the frame object is one of the + // following: an outgoing argument, pointer to a dynamically allocated + // stack space or a $gp restore location, + // - If the frame object is any of the following, its offset must be adjusted + // by adding the size of the stack: + // incoming argument, callee-saved register location or local variable. + int64_t Offset; + + if (MipsFI->isOutArgFI(FrameIndex)) + Offset = SPOffset; + else + Offset = SPOffset + (int64_t)StackSize; + + Offset += MI.getOperand(OpNo + 1).getImm(); + + DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); + + // If MI is not a debug value, make sure Offset fits in the 16-bit immediate + // field. + if (!MI.isDebugValue() && !isInt<16>(Offset)) { + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = II->getDebugLoc(); + unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu; + unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT; + MipsAnalyzeImmediate::Inst LastInst(0, 0); + + MipsFI->setEmitNOAT(); + Mips::loadImmediate(Offset, Subtarget.isABI_N64(), TII, MBB, II, DL, true, + &LastInst); + BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg); + + FrameReg = ATReg; + Offset = SignExtend64<16>(LastInst.ImmOpnd); + } + + MI.getOperand(OpNo).ChangeToRegister(FrameReg, false); + MI.getOperand(OpNo + 1).ChangeToImmediate(Offset); +} diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h new file mode 100644 index 0000000..4b17b33 --- /dev/null +++ b/lib/Target/Mips/MipsSERegisterInfo.h @@ -0,0 +1,39 @@ +//===-- MipsSERegisterInfo.h - Mips32/64 Register Information ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips32/64 implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSSEREGISTERINFO_H +#define MIPSSEREGISTERINFO_H + +#include "MipsRegisterInfo.h" + +namespace llvm { + +class MipsSERegisterInfo : public MipsRegisterInfo { +public: + MipsSERegisterInfo(const MipsSubtarget &Subtarget, + const TargetInstrInfo &TII); + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + +private: + virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, + int FrameIndex, uint64_t StackSize, + int64_t SPOffset) const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 3215c44..ba15362 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -89,6 +89,9 @@ protected: // InMips16 -- can process Mips16 instructions bool InMips16Mode; + // IsAndroid -- target is android + bool IsAndroid; + InstrItineraryData InstrItins; public: @@ -128,6 +131,7 @@ public: bool isNotSingleFloat() const { return !IsSingleFloat; } bool hasVFPU() const { return HasVFPU; } bool inMips16Mode() const { return InMips16Mode; } + bool isAndroid() const { return IsAndroid; } bool isLinux() const { return IsLinux; } bool hasStandardEncoding() const { return !inMips16Mode(); } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index dd5d35f..2928a73 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -13,6 +13,8 @@ #include "MipsTargetMachine.h" #include "Mips.h" +#include "MipsFrameLowering.h" +#include "MipsInstrInfo.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/TargetRegistry.h" @@ -22,8 +24,8 @@ extern "C" void LLVMInitializeMipsTarget() { // Register the target. RegisterTargetMachine<MipsebTargetMachine> X(TheMipsTarget); RegisterTargetMachine<MipselTargetMachine> Y(TheMipselTarget); - RegisterTargetMachine<Mips64ebTargetMachine> A(TheMips64Target); - RegisterTargetMachine<Mips64elTargetMachine> B(TheMips64elTarget); + RegisterTargetMachine<MipsebTargetMachine> A(TheMips64Target); + RegisterTargetMachine<MipselTargetMachine> B(TheMips64elTarget); } // DataLayout --> Big-endian, 32-bit pointer/ABI/alignment @@ -48,9 +50,10 @@ MipsTargetMachine(const Target &T, StringRef TT, (Subtarget.isABI_N64() ? "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" : "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")), - InstrInfo(*this), - FrameLowering(Subtarget), - TLInfo(*this), TSInfo(*this), JITInfo() { + InstrInfo(MipsInstrInfo::create(*this)), + FrameLowering(MipsFrameLowering::create(*this, Subtarget)), + TLInfo(*this), TSInfo(*this), JITInfo(), + ELFWriterInfo(false, isLittle) { } void MipsebTargetMachine::anchor() { } @@ -71,24 +74,6 @@ MipselTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL) : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} -void Mips64ebTargetMachine::anchor() { } - -Mips64ebTargetMachine:: -Mips64ebTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} - -void Mips64elTargetMachine::anchor() { } - -Mips64elTargetMachine:: -Mips64elTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} - namespace { /// Mips Code Generator Pass Configuration Options. class MipsPassConfig : public TargetPassConfig { diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 5cbf057..a542ef6 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -20,59 +20,67 @@ #include "MipsJITInfo.h" #include "MipsSelectionDAGInfo.h" #include "MipsSubtarget.h" +#include "MipsELFWriterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { - class formatted_raw_ostream; - - class MipsTargetMachine : public LLVMTargetMachine { - MipsSubtarget Subtarget; - const TargetData DataLayout; // Calculates type size & alignment - MipsInstrInfo InstrInfo; - MipsFrameLowering FrameLowering; - MipsTargetLowering TLInfo; - MipsSelectionDAGInfo TSInfo; - MipsJITInfo JITInfo; - - public: - MipsTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool isLittle); - - virtual const MipsInstrInfo *getInstrInfo() const - { return &InstrInfo; } - virtual const TargetFrameLowering *getFrameLowering() const - { return &FrameLowering; } - virtual const MipsSubtarget *getSubtargetImpl() const - { return &Subtarget; } - virtual const TargetData *getTargetData() const - { return &DataLayout;} - virtual MipsJITInfo *getJITInfo() - { return &JITInfo; } - - - virtual const MipsRegisterInfo *getRegisterInfo() const { - return &InstrInfo.getRegisterInfo(); - } - - virtual const MipsTargetLowering *getTargetLowering() const { - return &TLInfo; - } - - virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const { - return &TSInfo; - } - - // Pass Pipeline Configuration - virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); - virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); - }; - -/// MipsebTargetMachine - Mips32 big endian target machine. +class formatted_raw_ostream; +class MipsRegisterInfo; + +class MipsTargetMachine : public LLVMTargetMachine { + MipsSubtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + const MipsInstrInfo *InstrInfo; + const MipsFrameLowering *FrameLowering; + MipsTargetLowering TLInfo; + MipsSelectionDAGInfo TSInfo; + MipsJITInfo JITInfo; + MipsELFWriterInfo ELFWriterInfo; + +public: + MipsTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, + bool isLittle); + + virtual ~MipsTargetMachine() { delete InstrInfo; } + + virtual const MipsInstrInfo *getInstrInfo() const + { return InstrInfo; } + virtual const TargetFrameLowering *getFrameLowering() const + { return FrameLowering; } + virtual const MipsSubtarget *getSubtargetImpl() const + { return &Subtarget; } + virtual const TargetData *getTargetData() const + { return &DataLayout;} + virtual MipsJITInfo *getJITInfo() + { return &JITInfo; } + + virtual const MipsRegisterInfo *getRegisterInfo() const { + return &InstrInfo->getRegisterInfo(); + } + + virtual const MipsTargetLowering *getTargetLowering() const { + return &TLInfo; + } + + virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + + virtual const MipsELFWriterInfo *getELFWriterInfo() const { + return &ELFWriterInfo; + } + + // Pass Pipeline Configuration + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); +}; + +/// MipsebTargetMachine - Mips32/64 big endian target machine. /// class MipsebTargetMachine : public MipsTargetMachine { virtual void anchor(); @@ -83,7 +91,7 @@ public: CodeGenOpt::Level OL); }; -/// MipselTargetMachine - Mips32 little endian target machine. +/// MipselTargetMachine - Mips32/64 little endian target machine. /// class MipselTargetMachine : public MipsTargetMachine { virtual void anchor(); @@ -94,29 +102,6 @@ public: CodeGenOpt::Level OL); }; -/// Mips64ebTargetMachine - Mips64 big endian target machine. -/// -class Mips64ebTargetMachine : public MipsTargetMachine { - virtual void anchor(); -public: - Mips64ebTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; - -/// Mips64elTargetMachine - Mips64 little endian target machine. -/// -class Mips64elTargetMachine : public MipsTargetMachine { - virtual void anchor(); -public: - Mips64elTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; } // End llvm namespace #endif diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index f50f9b5..2a2abb1 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -337,7 +337,10 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, // can get a useful trip count. The trip count can // be either a register or an immediate. The location // of the value depends upon the type (reg or imm). - while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) { + for (MachineRegisterInfo::reg_iterator + RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); + RI != RE; ++RI) { + IV_Opnd = &RI.getOperand(); bool SignedCmp; MachineInstr *MI = IV_Opnd->getParent(); if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) && diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 13250b3..61d44c5 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -106,7 +106,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // from FP_ROUND: that rounds to nearest, this rounds to zero. setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); - // We do not currently implment this libm ops for PowerPC. + // We do not currently implement these libm ops for PowerPC. setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); @@ -394,8 +394,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); } - if (Subtarget->has64BitSupport()) + if (Subtarget->has64BitSupport()) { setOperationAction(ISD::PREFETCH, MVT::Other, Legal); + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); + } setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 91c5366..39778a5 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -265,6 +265,15 @@ def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS), PPC970_DGroup_First, PPC970_Unit_FXU; } +let Pattern = [(set G8RC:$rT, readcyclecounter)] in +def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins), + "mfspr $rT, 268", SprMFTB>, + PPC970_DGroup_First, PPC970_Unit_FXU; +// Note that encoding mftb using mfspr is now the preferred form, +// and has been since at least ISA v2.03. The mftb instruction has +// now been phased out. Using mfspr, however, is known not to work on +// the POWER3. + let Defs = [X1], Uses = [X1] in def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"", [(set G8RC:$result, diff --git a/lib/Target/PowerPC/TargetInfo/Makefile b/lib/Target/PowerPC/TargetInfo/Makefile index a101aa4..2d0560d 100644 --- a/lib/Target/PowerPC/TargetInfo/Makefile +++ b/lib/Target/PowerPC/TargetInfo/Makefile @@ -10,6 +10,6 @@ LEVEL = ../../../.. LIBRARYNAME = LLVMPowerPCInfo # Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. +override CPPFLAGS += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. include $(LEVEL)/Makefile.common diff --git a/lib/Target/README.txt b/lib/Target/README.txt index cbfa4cf..9c27f27 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -2367,8 +2367,3 @@ unsigned foo(unsigned x, unsigned y) { return x > y && x != 0; } should fold to x > y. //===---------------------------------------------------------------------===// - -int f(double x) { return __builtin_fabs(x) < 0.0; } -should fold to false. - -//===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 6357468..ff8d3c5 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -109,9 +109,6 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } -void SparcRegisterInfo:: -processFunctionBeforeFrameFinalized(MachineFunction &MF) const {} - unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return SP::I6; } diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index ec95ad4..8e215a7 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -24,64 +24,72 @@ void TargetLibraryInfo::anchor() { } const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = { + "__cxa_atexit", + "__cxa_guard_abort", + "__cxa_guard_acquire", + "__cxa_guard_release", + "__memcpy_chk", "acos", - "acosl", "acosf", + "acosl", "asin", - "asinl", "asinf", + "asinl", "atan", - "atanl", - "atanf", "atan2", - "atan2l", "atan2f", + "atan2l", + "atanf", + "atanl", "ceil", - "ceill", "ceilf", + "ceill", "copysign", "copysignf", "copysignl", "cos", - "cosl", "cosf", "cosh", - "coshl", "coshf", + "coshl", + "cosl", "exp", - "expl", - "expf", "exp2", - "exp2l", "exp2f", + "exp2l", + "expf", + "expl", "expm1", - "expm1l", "expm1f", + "expm1l", "fabs", - "fabsl", "fabsf", + "fabsl", + "fiprintf", "floor", - "floorl", "floorf", - "fiprintf", + "floorl", "fmod", - "fmodl", "fmodf", + "fmodl", + "fputc", "fputs", "fwrite", "iprintf", "log", - "logl", - "logf", - "log2", - "log2l", - "log2f", "log10", - "log10l", "log10f", + "log10l", "log1p", - "log1pl", "log1pf", + "log1pl", + "log2", + "log2f", + "log2l", + "logf", + "logl", + "memchr", + "memcmp", "memcpy", "memmove", "memset", @@ -92,6 +100,8 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "pow", "powf", "powl", + "putchar", + "puts", "rint", "rintf", "rintl", @@ -99,36 +109,48 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "roundf", "roundl", "sin", - "sinl", "sinf", "sinh", - "sinhl", "sinhf", + "sinhl", + "sinl", "siprintf", "sqrt", - "sqrtl", "sqrtf", + "sqrtl", + "strcat", + "strchr", + "strcpy", + "strlen", + "strncat", + "strncmp", + "strncpy", + "strnlen", "tan", - "tanl", "tanf", "tanh", - "tanhl", "tanhf", + "tanhl", + "tanl", "trunc", "truncf", - "truncl", - "__cxa_atexit", - "__cxa_guard_abort", - "__cxa_guard_acquire", - "__cxa_guard_release" + "truncl" }; /// initialize - Initialize the set of available library functions based on the /// specified target triple. This should be carefully written so that a missing /// target triple gets a sane set of defaults. -static void initialize(TargetLibraryInfo &TLI, const Triple &T) { +static void initialize(TargetLibraryInfo &TLI, const Triple &T, + const char **StandardNames) { initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry()); +#ifndef NDEBUG + // Verify that the StandardNames array is in alphabetical order. + for (unsigned F = 1; F < LibFunc::NumLibFuncs; ++F) { + if (strcmp(StandardNames[F-1], StandardNames[F]) >= 0) + llvm_unreachable("TargetLibraryInfo function names must be sorted"); + } +#endif // !NDEBUG // memset_pattern16 is only available on iOS 3.0 and Mac OS/X 10.5 and later. if (T.isMacOSX()) { @@ -240,14 +262,14 @@ TargetLibraryInfo::TargetLibraryInfo() : ImmutablePass(ID) { // Default to everything being available. memset(AvailableArray, -1, sizeof(AvailableArray)); - initialize(*this, Triple()); + initialize(*this, Triple(), StandardNames); } TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) { // Default to everything being available. memset(AvailableArray, -1, sizeof(AvailableArray)); - initialize(*this, T); + initialize(*this, T, StandardNames); } TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI) @@ -256,6 +278,17 @@ TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI) CustomNames = TLI.CustomNames; } +bool TargetLibraryInfo::getLibFunc(StringRef funcName, + LibFunc::Func &F) const { + const char **Start = &StandardNames[0]; + const char **End = &StandardNames[LibFunc::NumLibFuncs]; + const char **I = std::lower_bound(Start, End, funcName); + if (I != End && *I == funcName) { + F = (LibFunc::Func)(I - Start); + return true; + } + return false; +} /// disableAllFunctions - This disables all builtins, which is used for options /// like -fno-builtin. diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 95e83ec..73a0095 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -39,7 +39,9 @@ private: MCAsmLexer &getLexer() const { return Parser.getLexer(); } bool Error(SMLoc L, const Twine &Msg, - ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { + ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(), + bool matchingInlineAsm = false) { + if (matchingInlineAsm) return true; return Parser.Error(L, Msg, Ranges); } @@ -65,6 +67,12 @@ private: SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out); + bool MatchInstruction(SMLoc IDLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + SmallVectorImpl<MCInst> &MCInsts, + unsigned &OrigErrorInfo, + bool matchingInlineAsm = false); + /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode. bool isSrcOp(X86Operand &Op); @@ -1508,9 +1516,24 @@ bool X86AsmParser:: MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out) { + SmallVector<MCInst, 2> Insts; + unsigned ErrorInfo; + bool Error = MatchInstruction(IDLoc, Operands, Insts, ErrorInfo); + if (!Error) + for (unsigned i = 0, e = Insts.size(); i != e; ++i) + Out.EmitInstruction(Insts[i]); + return Error; +} + +bool X86AsmParser:: +MatchInstruction(SMLoc IDLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + SmallVectorImpl<MCInst> &MCInsts, unsigned &OrigErrorInfo, + bool matchingInlineAsm) { assert(!Operands.empty() && "Unexpect empty operand list!"); X86Operand *Op = static_cast<X86Operand*>(Operands[0]); assert(Op->isToken() && "Leading operand should always be a mnemonic!"); + ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>(); // First, handle aliases that expand to multiple instructions. // FIXME: This should be replaced with a real .td file alias mechanism. @@ -1523,7 +1546,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, MCInst Inst; Inst.setOpcode(X86::WAIT); Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst); + MCInsts.push_back(Inst); const char *Repl = StringSwitch<const char*>(Op->getToken()) @@ -1542,7 +1565,6 @@ MatchAndEmitInstruction(SMLoc IDLoc, } bool WasOriginallyInvalidOperand = false; - unsigned OrigErrorInfo; MCInst Inst; // First, try a direct match. @@ -1557,13 +1579,15 @@ MatchAndEmitInstruction(SMLoc IDLoc, ; Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst); + MCInsts.push_back(Inst); return false; case Match_MissingFeature: - Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + Error(IDLoc, "instruction requires a CPU feature not currently enabled", + EmptyRanges, matchingInlineAsm); return true; case Match_ConversionFail: - return Error(IDLoc, "unable to convert operands to instruction"); + return Error(IDLoc, "unable to convert operands to instruction", + EmptyRanges, matchingInlineAsm); case Match_InvalidOperand: WasOriginallyInvalidOperand = true; break; @@ -1615,7 +1639,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, (Match3 == Match_Success) + (Match4 == Match_Success); if (NumSuccessfulMatches == 1) { Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst); + MCInsts.push_back(Inst); return false; } @@ -1642,7 +1666,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, OS << "'" << Base << MatchChars[i] << "'"; } OS << ")"; - Error(IDLoc, OS.str()); + Error(IDLoc, OS.str(), EmptyRanges, matchingInlineAsm); return true; } @@ -1654,30 +1678,33 @@ MatchAndEmitInstruction(SMLoc IDLoc, (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) { if (!WasOriginallyInvalidOperand) { return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", - Op->getLocRange()); + Op->getLocRange(), matchingInlineAsm); } // Recover location info for the operand if we know which was the problem. if (OrigErrorInfo != ~0U) { if (OrigErrorInfo >= Operands.size()) - return Error(IDLoc, "too few operands for instruction"); + return Error(IDLoc, "too few operands for instruction", + EmptyRanges, matchingInlineAsm); X86Operand *Operand = (X86Operand*)Operands[OrigErrorInfo]; if (Operand->getStartLoc().isValid()) { SMRange OperandRange = Operand->getLocRange(); return Error(Operand->getStartLoc(), "invalid operand for instruction", - OperandRange); + OperandRange, matchingInlineAsm); } } - return Error(IDLoc, "invalid operand for instruction"); + return Error(IDLoc, "invalid operand for instruction", EmptyRanges, + matchingInlineAsm); } // If one instruction matched with a missing feature, report this as a // missing feature. if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) + (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){ - Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + Error(IDLoc, "instruction requires a CPU feature not currently enabled", + EmptyRanges, matchingInlineAsm); return true; } @@ -1685,12 +1712,14 @@ MatchAndEmitInstruction(SMLoc IDLoc, // operand failure. if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) + (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){ - Error(IDLoc, "invalid operand for instruction"); + Error(IDLoc, "invalid operand for instruction", EmptyRanges, + matchingInlineAsm); return true; } // If all of these were an outright failure, report it in a useless way. - Error(IDLoc, "unknown use of instruction mnemonic without a size suffix"); + Error(IDLoc, "unknown use of instruction mnemonic without a size suffix", + EmptyRanges, matchingInlineAsm); return true; } diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 4bbfe95..5039887 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -327,7 +327,7 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, if (type == TYPE_RELv) { isBranch = true; pcrel = insn.startLocation + - insn.displacementOffset + insn.displacementSize; + insn.immediateOffset + insn.immediateSize; switch (insn.displacementSize) { default: break; @@ -762,8 +762,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, translateRegister(mcInst, insn.vvvv); return false; case ENCODING_DUP: - return translateOperand(mcInst, - insn.spec->operands[operand.type - TYPE_DUP0], + return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0], insn, Dis); } } @@ -789,8 +788,8 @@ static bool translateInstruction(MCInst &mcInst, insn.numImmediatesTranslated = 0; for (index = 0; index < X86_MAX_OPERANDS; ++index) { - if (insn.spec->operands[index].encoding != ENCODING_NONE) { - if (translateOperand(mcInst, insn.spec->operands[index], insn, Dis)) { + if (insn.operands[index].encoding != ENCODING_NONE) { + if (translateOperand(mcInst, insn.operands[index], insn, Dis)) { return true; } } diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h index c11f51c..0dbfa26 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.h +++ b/lib/Target/X86/Disassembler/X86Disassembler.h @@ -20,7 +20,7 @@ // 2. Read the opcode, and determine what kind of opcode it is. The // disassembler distinguishes four kinds of opcodes, which are enumerated in // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte -// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a +// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context. // // 3. Depending on the opcode type, look in one of four ClassDecision structures @@ -74,8 +74,8 @@ #ifndef X86DISASSEMBLER_H #define X86DISASSEMBLER_H -#define INSTRUCTION_SPECIFIER_FIELDS \ - const char* name; +#define INSTRUCTION_SPECIFIER_FIELDS \ + uint16_t operands; #define INSTRUCTION_IDS \ unsigned instructionIDs; @@ -88,7 +88,7 @@ #include "llvm/MC/MCDisassembler.h" namespace llvm { - + class MCInst; class MCInstrInfo; class MCSubtargetInfo; @@ -96,7 +96,7 @@ class MemoryObject; class raw_ostream; struct EDInstInfo; - + namespace X86Disassembler { /// X86GenericDisassembler - Generic disassembler for all X86 platforms. diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index 6020877..0c92912 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -1495,14 +1495,14 @@ static int readOperands(struct InternalInstruction* insn) { needVVVV = hasVVVV && (insn->vvvv != 0); for (index = 0; index < X86_MAX_OPERANDS; ++index) { - switch (insn->spec->operands[index].encoding) { + switch (x86OperandSets[insn->spec->operands][index].encoding) { case ENCODING_NONE: break; case ENCODING_REG: case ENCODING_RM: if (readModRM(insn)) return -1; - if (fixupReg(insn, &insn->spec->operands[index])) + if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) return -1; break; case ENCODING_CB: @@ -1524,14 +1524,14 @@ static int readOperands(struct InternalInstruction* insn) { } if (readImmediate(insn, 1)) return -1; - if (insn->spec->operands[index].type == TYPE_IMM3 && + if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 && insn->immediates[insn->numImmediatesConsumed - 1] > 7) return -1; - if (insn->spec->operands[index].type == TYPE_IMM5 && + if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 && insn->immediates[insn->numImmediatesConsumed - 1] > 31) return -1; - if (insn->spec->operands[index].type == TYPE_XMM128 || - insn->spec->operands[index].type == TYPE_XMM256) + if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 || + x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256) sawRegImm = 1; break; case ENCODING_IW: @@ -1582,7 +1582,7 @@ static int readOperands(struct InternalInstruction* insn) { needVVVV = 0; /* Mark that we have found a VVVV operand. */ if (!hasVVVV) return -1; - if (fixupReg(insn, &insn->spec->operands[index])) + if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) return -1; break; case ENCODING_DUP: @@ -1644,6 +1644,8 @@ int decodeInstruction(struct InternalInstruction* insn, insn->instructionID == 0 || readOperands(insn)) return -1; + + insn->operands = &x86OperandSets[insn->spec->operands][0]; insn->length = insn->readerCursor - insn->startLocation; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index e2caf6a..797703f 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -19,17 +19,18 @@ #ifdef __cplusplus extern "C" { #endif - -#define INSTRUCTION_SPECIFIER_FIELDS + +#define INSTRUCTION_SPECIFIER_FIELDS \ + uint16_t operands; #define INSTRUCTION_IDS \ unsigned instructionIDs; #include "X86DisassemblerDecoderCommon.h" - + #undef INSTRUCTION_SPECIFIER_FIELDS #undef INSTRUCTION_IDS - + /* * Accessor functions for various fields of an Intel instruction */ @@ -43,7 +44,7 @@ extern "C" { #define rFromREX(rex) (((rex) & 0x4) >> 2) #define xFromREX(rex) (((rex) & 0x2) >> 1) #define bFromREX(rex) ((rex) & 0x1) - + #define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7) #define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6) #define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5) @@ -237,7 +238,7 @@ extern "C" { ENTRY(YMM13) \ ENTRY(YMM14) \ ENTRY(YMM15) - + #define REGS_SEGMENT \ ENTRY(ES) \ ENTRY(CS) \ @@ -245,7 +246,7 @@ extern "C" { ENTRY(DS) \ ENTRY(FS) \ ENTRY(GS) - + #define REGS_DEBUG \ ENTRY(DR0) \ ENTRY(DR1) \ @@ -266,12 +267,12 @@ extern "C" { ENTRY(CR6) \ ENTRY(CR7) \ ENTRY(CR8) - + #define ALL_EA_BASES \ EA_BASES_16BIT \ EA_BASES_32BIT \ EA_BASES_64BIT - + #define ALL_SIB_BASES \ REGS_32BIT \ REGS_64BIT @@ -290,7 +291,7 @@ extern "C" { ENTRY(RIP) /* - * EABase - All possible values of the base field for effective-address + * EABase - All possible values of the base field for effective-address * computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We * distinguish between bases (EA_BASE_*) and registers that just happen to be * referred to when Mod == 0b11 (EA_REG_*). @@ -305,8 +306,8 @@ typedef enum { #undef ENTRY EA_max } EABase; - -/* + +/* * SIBIndex - All possible values of the SIB index field. * Borrows entries from ALL_EA_BASES with the special case that * sib is synonymous with NONE. @@ -321,7 +322,7 @@ typedef enum { #undef ENTRY SIB_INDEX_max } SIBIndex; - + /* * SIBBase - All possible values of the SIB base field. */ @@ -353,7 +354,7 @@ typedef enum { #undef ENTRY MODRM_REG_max } Reg; - + /* * SegmentOverride - All possible segment overrides. */ @@ -367,7 +368,7 @@ typedef enum { SEG_OVERRIDE_GS, SEG_OVERRIDE_max } SegmentOverride; - + /* * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field */ @@ -431,16 +432,16 @@ struct InternalInstruction { void* dlogArg; /* General instruction information */ - + /* The mode to disassemble for (64-bit, protected, real) */ DisassemblerMode mode; /* The start of the instruction, usable with the reader */ uint64_t startLocation; /* The length of the instruction, in bytes */ size_t length; - + /* Prefix state */ - + /* 1 if the prefix byte corresponding to the entry is present; 0 if not */ uint8_t prefixPresent[0x100]; /* contains the location (for use with the reader) of the prefix byte */ @@ -456,7 +457,7 @@ struct InternalInstruction { uint64_t necessaryPrefixLocation; /* The segment override type */ SegmentOverride segmentOverride; - + /* Sizes of various critical pieces of data, in bytes */ uint8_t registerSize; uint8_t addressSize; @@ -467,9 +468,9 @@ struct InternalInstruction { needed to find relocation entries for adding symbolic operands */ uint8_t displacementOffset; uint8_t immediateOffset; - + /* opcode state */ - + /* The value of the two-byte escape prefix (usually 0x0f) */ uint8_t twoByteEscape; /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */ @@ -478,16 +479,16 @@ struct InternalInstruction { uint8_t opcode; /* The ModR/M byte of the instruction, if it is an opcode extension */ uint8_t modRMExtension; - + /* decode state */ - + /* The type of opcode, used for indexing into the array of decode tables */ OpcodeType opcodeType; /* The instruction ID, extracted from the decode table */ uint16_t instructionID; /* The specifier for the instruction, from the instruction info table */ const struct InstructionSpecifier *spec; - + /* state for additional bytes, consumed during operand decode. Pattern: consumed___ indicates that the byte was already consumed and does not need to be consumed again */ @@ -495,12 +496,12 @@ struct InternalInstruction { /* The VEX.vvvv field, which contains a third register operand for some AVX instructions */ Reg vvvv; - + /* The ModR/M byte, which contains most register operands and some portion of all memory operands */ BOOL consumedModRM; uint8_t modRM; - + /* The SIB byte, used for more complex 32- or 64-bit memory operands */ BOOL consumedSIB; uint8_t sib; @@ -508,19 +509,19 @@ struct InternalInstruction { /* The displacement, used for memory operands */ BOOL consumedDisplacement; int32_t displacement; - + /* Immediates. There can be two in some cases */ uint8_t numImmediatesConsumed; uint8_t numImmediatesTranslated; uint64_t immediates[2]; - + /* A register or immediate operand encoded into the opcode */ BOOL consumedOpcodeModifier; uint8_t opcodeModifier; Reg opcodeRegister; - + /* Portions of the ModR/M byte */ - + /* These fields determine the allowable values for the ModR/M fields, which depend on operand and address widths */ EABase eaBaseBase; @@ -533,11 +534,13 @@ struct InternalInstruction { EADisplacement eaDisplacement; /* The reg field always encodes a register */ Reg reg; - + /* SIB state */ SIBIndex sibIndex; uint8_t sibScale; SIBBase sibBase; + + const struct OperandSpecifier *operands; }; /* decodeInstruction - Decode one instruction and store the decoding results in @@ -571,15 +574,15 @@ int decodeInstruction(struct InternalInstruction* insn, * @param line - The line number that printed the debug message. * @param s - The message to print. */ - + void x86DisassemblerDebug(const char *file, unsigned line, const char *s); const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii); -#ifdef __cplusplus +#ifdef __cplusplus } #endif - + #endif diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 13e1136..b0a0e1e 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -119,7 +119,7 @@ enum attributeBits { ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize") -#define ENUM_ENTRY(n, r, d) n, +#define ENUM_ENTRY(n, r, d) n, typedef enum { INSTRUCTION_CONTEXTS IC_max @@ -148,11 +148,11 @@ typedef enum { * If a ModR/M byte is not required, "required" is left unset, and the values * for each instructionID are identical. */ - + typedef uint16_t InstrUID; /* - * ModRMDecisionType - describes the type of ModR/M decision, allowing the + * ModRMDecisionType - describes the type of ModR/M decision, allowing the * consumer to determine the number of entries in it. * * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded @@ -172,7 +172,7 @@ typedef uint16_t InstrUID; ENUM_ENTRY(MODRM_SPLITREG) \ ENUM_ENTRY(MODRM_FULL) -#define ENUM_ENTRY(n) n, +#define ENUM_ENTRY(n) n, typedef enum { MODRMTYPES MODRM_max @@ -180,13 +180,13 @@ typedef enum { #undef ENUM_ENTRY /* - * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which + * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which * instruction each possible value of the ModR/M byte corresponds to. Once * this information is known, we have narrowed down to a single instruction. */ struct ModRMDecision { uint8_t modrm_type; - + /* The macro below must be defined wherever this file is included. */ INSTRUCTION_IDS }; @@ -210,7 +210,7 @@ struct ContextDecision { struct OpcodeDecision opcodeDecisions[IC_max]; }; -/* +/* * Physical encodings of instruction operands. */ @@ -244,14 +244,14 @@ struct ContextDecision { ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \ "in type") -#define ENUM_ENTRY(n, d) n, +#define ENUM_ENTRY(n, d) n, typedef enum { ENCODINGS ENCODING_max } OperandEncoding; #undef ENUM_ENTRY -/* +/* * Semantic interpretations of instruction operands. */ @@ -332,14 +332,14 @@ struct ContextDecision { ENUM_ENTRY(TYPE_DUP4, "operand 4") \ ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state") -#define ENUM_ENTRY(n, d) n, +#define ENUM_ENTRY(n, d) n, typedef enum { TYPES TYPE_max } OperandType; #undef ENUM_ENTRY -/* +/* * OperandSpecifier - The specification for how to extract and interpret one * operand. */ @@ -374,8 +374,7 @@ typedef enum { struct InstructionSpecifier { uint8_t modifierType; uint8_t modifierBase; - struct OperandSpecifier operands[X86_MAX_OPERANDS]; - + /* The macro below must be defined wherever this file is included. */ INSTRUCTION_SPECIFIER_FIELDS }; diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 49c07f3..b0acd7d 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -91,9 +91,10 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { // Exceptions handling ExceptionsType = ExceptionHandling::DwarfCFI; - // OpenBSD has buggy support for .quad in 32-bit mode, just split into two - // .words. - if (T.getOS() == Triple::OpenBSD && T.getArch() == Triple::x86) + // OpenBSD and Bitrig have buggy support for .quad in 32-bit mode, just split + // into two .words. + if ((T.getOS() == Triple::OpenBSD || T.getOS() == Triple::Bitrig) && + T.getArch() == Triple::x86) Data64bitsDirective = 0; } diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index bf05ccf..dce5b4d 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -26,7 +26,7 @@ class FunctionPass; class JITCodeEmitter; class X86TargetMachine; -/// createX86ISelDag - This pass converts a legalized DAG into a +/// createX86ISelDag - This pass converts a legalized DAG into a /// X86-specific DAG, ready for instruction scheduling. /// FunctionPass *createX86ISelDag(X86TargetMachine &TM, diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 6c1a816..18e6b7c 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -17,14 +17,14 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// -// X86 Subtarget state. +// X86 Subtarget state // def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true", "64-bit mode (x86_64)">; //===----------------------------------------------------------------------===// -// X86 Subtarget features. +// X86 Subtarget features //===----------------------------------------------------------------------===// def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", @@ -97,7 +97,7 @@ def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", [FeatureAVX, FeatureSSE4A]>; def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", "Enable XOP instructions", - [FeatureAVX, FeatureSSE4A]>; + [FeatureFMA4]>; def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem", "HasVectorUAMem", "true", "Allow unaligned memory operands on vector/SIMD instructions">; @@ -226,7 +226,7 @@ def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePCLMUL, FeatureF16C, FeatureLZCNT, - FeaturePOPCNT, FeatureBMI]>; + FeaturePOPCNT, FeatureBMI, FeatureFMA]>; def : Proc<"winchip-c6", [FeatureMMX]>; def : Proc<"winchip2", [Feature3DNow]>; diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index a6ed9ba..35386cd 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -37,15 +37,15 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { virtual const char *getPassName() const { return "X86 AT&T-Style Assembly Printer"; } - + const X86Subtarget &getSubtarget() const { return *Subtarget; } virtual void EmitStartOfAsmFile(Module &M); virtual void EmitEndOfAsmFile(Module &M); - + virtual void EmitInstruction(const MachineInstr *MI); - + void printSymbolOperand(const MachineOperand &MO, raw_ostream &O); // These methods are used by the tablegen'erated instruction printer. @@ -71,7 +71,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { void printPICLabel(const MachineInstr *MI, unsigned Op, raw_ostream &O); bool runOnMachineFunction(MachineFunction &F); - + void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); MachineLocation getDebugValueLocation(const MachineInstr *MI) const; diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp index e01ff41..6a6125b 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp +++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp @@ -17,4 +17,3 @@ using namespace llvm; X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() { } - diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h index 0cec95a..471eb31 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.h +++ b/lib/Target/X86/X86COFFMachineModuleInfo.h @@ -1,4 +1,4 @@ -//===-- X86COFFMachineModuleInfo.h - X86 COFF MMI Impl ----------*- C++ -*-===// +//===-- X86coffmachinemoduleinfo.h - X86 COFF MMI Impl ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -33,7 +33,7 @@ public: void addExternalFunction(MCSymbol* Symbol) { Externals.insert(Symbol); } - + typedef DenseSet<MCSymbol const *>::const_iterator externals_iterator; externals_iterator externals_begin() const { return Externals.begin(); } externals_iterator externals_end() const { return Externals.end(); } diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 585b7a5..e5952aa 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -57,7 +57,9 @@ class X86FastISel : public FastISel { bool X86ScalarSSEf32; public: - explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) { + explicit X86FastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) + : FastISel(funcInfo, libInfo) { Subtarget = &TM.getSubtarget<X86Subtarget>(); StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; X86ScalarSSEf64 = Subtarget->hasSSE2(); @@ -155,9 +157,9 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { // For now, require SSE/SSE2 for performing floating-point operations, // since x87 requires additional work. if (VT == MVT::f64 && !X86ScalarSSEf64) - return false; + return false; if (VT == MVT::f32 && !X86ScalarSSEf32) - return false; + return false; // Similarly, no f80 support yet. if (VT == MVT::f80) return false; @@ -1516,6 +1518,22 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { return DoSelectCall(I, 0); } +static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget, + const ImmutableCallSite &CS) { + if (Subtarget.is64Bit()) + return 0; + if (Subtarget.isTargetWindows()) + return 0; + CallingConv::ID CC = CS.getCallingConv(); + if (CC == CallingConv::Fast || CC == CallingConv::GHC) + return 0; + if (!CS.paramHasAttr(1, Attribute::StructRet)) + return 0; + if (CS.paramHasAttr(1, Attribute::InReg)) + return 0; + return 4; +} + // Select either a call, or an llvm.memcpy/memmove/memset intrinsic bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { const CallInst *CI = cast<CallInst>(I); @@ -1862,12 +1880,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Issue CALLSEQ_END unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); - unsigned NumBytesCallee = 0; - if (!Subtarget->is64Bit() && !Subtarget->isTargetWindows() && - !(CS.getCallingConv() == CallingConv::Fast || - CS.getCallingConv() == CallingConv::GHC) && - CS.paramHasAttr(1, Attribute::StructRet)) - NumBytesCallee = 4; + const unsigned NumBytesCallee = computeBytesPoppedByCallee(*Subtarget, CS); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp)) .addImm(NumBytes).addImm(NumBytesCallee); @@ -2129,28 +2142,28 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) { unsigned Opc = 0; const TargetRegisterClass *RC = NULL; switch (VT.SimpleTy) { - default: return false; - case MVT::f32: - if (X86ScalarSSEf32) { - Opc = X86::FsFLD0SS; - RC = &X86::FR32RegClass; - } else { - Opc = X86::LD_Fp032; - RC = &X86::RFP32RegClass; - } - break; - case MVT::f64: - if (X86ScalarSSEf64) { - Opc = X86::FsFLD0SD; - RC = &X86::FR64RegClass; - } else { - Opc = X86::LD_Fp064; - RC = &X86::RFP64RegClass; - } - break; - case MVT::f80: - // No f80 support yet. - return false; + default: return false; + case MVT::f32: + if (X86ScalarSSEf32) { + Opc = X86::FsFLD0SS; + RC = &X86::FR32RegClass; + } else { + Opc = X86::LD_Fp032; + RC = &X86::RFP32RegClass; + } + break; + case MVT::f64: + if (X86ScalarSSEf64) { + Opc = X86::FsFLD0SD; + RC = &X86::FR64RegClass; + } else { + Opc = X86::LD_Fp064; + RC = &X86::RFP64RegClass; + } + break; + case MVT::f80: + // No f80 support yet. + return false; } unsigned ResultReg = createResultReg(RC); @@ -2169,7 +2182,7 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, if (!X86SelectAddress(LI->getOperand(0), AM)) return false; - X86InstrInfo &XII = (X86InstrInfo&)TII; + const X86InstrInfo &XII = (const X86InstrInfo&)TII; unsigned Size = TD.getTypeAllocSize(LI->getType()); unsigned Alignment = LI->getAlignment(); @@ -2188,7 +2201,8 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, namespace llvm { - FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) { - return new X86FastISel(funcInfo); + FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) { + return new X86FastISel(funcInfo, libInfo); } } diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 711ee41..955c75a 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -971,7 +971,7 @@ void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { // Change from the pseudo instruction to the concrete instruction. MI->RemoveOperand(0); // Remove the explicit ST(0) operand MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); - + // Result gets pushed on the stack. pushReg(DestReg); } @@ -1015,7 +1015,7 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { } else { moveToTop(Reg, I); // Move to the top of the stack... } - + // Convert from the pseudo instruction to the concrete instruction. MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); @@ -1297,7 +1297,7 @@ void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) { MI->RemoveOperand(1); MI->getOperand(0).setReg(getSTReg(Op1)); MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); - + // If we kill the second operand, make sure to pop it from the stack. if (Op0 != Op1 && KillsOp1) { // Get this value off of the register stack. @@ -1714,38 +1714,38 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { // Assert that the top of stack contains the right FP register. assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) && "Top of stack not the right register for RET!"); - + // Ok, everything is good, mark the value as not being on the stack // anymore so that our assertion about the stack being empty at end of // block doesn't fire. StackTop = 0; return; } - + // Otherwise, we are returning two values: // 2) If returning the same value for both, we only have one thing in the FP // stack. Consider: RET FP1, FP1 if (StackTop == 1) { assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&& "Stack misconfiguration for RET!"); - + // Duplicate the TOS so that we return it twice. Just pick some other FPx // register to hold it. unsigned NewReg = getScratchReg(); duplicateToTop(FirstFPRegOp, NewReg, MI); FirstFPRegOp = NewReg; } - + /// Okay we know we have two different FPx operands now: assert(StackTop == 2 && "Must have two values live!"); - + /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently /// in ST(1). In this case, emit an fxch. if (getStackEntry(0) == SecondFPRegOp) { assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live"); moveToTop(FirstFPRegOp, MI); } - + /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in /// ST(1). Just remove both from our understanding of the stack and return. assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live"); diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 5186482..27195b4 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -60,7 +60,7 @@ namespace { int Base_FrameIndex; unsigned Scale; - SDValue IndexReg; + SDValue IndexReg; int32_t Disp; SDValue Segment; const GlobalValue *GV; @@ -80,11 +80,11 @@ namespace { bool hasSymbolicDisplacement() const { return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0; } - + bool hasBaseOrIndexReg() const { return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0; } - + /// isRIPRelative - Return true if this addressing mode is already RIP /// relative. bool isRIPRelative() const { @@ -94,7 +94,7 @@ namespace { return RegNode->getReg() == X86::RIP; return false; } - + void setBaseReg(SDValue Reg) { BaseType = RegBase; Base_Reg = Reg; @@ -104,7 +104,7 @@ namespace { dbgs() << "X86ISelAddressMode " << this << '\n'; dbgs() << "Base_Reg "; if (Base_Reg.getNode() != 0) - Base_Reg.getNode()->dump(); + Base_Reg.getNode()->dump(); else dbgs() << "nul"; dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n' @@ -113,7 +113,7 @@ namespace { if (IndexReg.getNode() != 0) IndexReg.getNode()->dump(); else - dbgs() << "nul"; + dbgs() << "nul"; dbgs() << " Disp " << Disp << '\n' << "GV "; if (GV) @@ -213,21 +213,21 @@ namespace { SDValue &Index, SDValue &Disp, SDValue &Segment, SDValue &NodeWithChain); - + bool TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps); - + void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); - inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, + inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? @@ -426,7 +426,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { void X86DAGToDAGISel::PreprocessISelDAG() { // OptForSize is used in pattern predicates that isel is matching. OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize); - + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ) { SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. @@ -462,7 +462,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { ++NumLoadMoved; continue; } - + // Lower fpround and fpextend nodes that target the FP stack to be store and // load to the stack. This is a gross hack. We would like to simply mark // these as being illegal, but when we do that, legalize produces these when @@ -473,7 +473,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { // FIXME: This should only happen when not compiled with -O0. if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) continue; - + EVT SrcVT = N->getOperand(0).getValueType(); EVT DstVT = N->getValueType(0); @@ -496,7 +496,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (N->getConstantOperandVal(1)) continue; } - + // Here we could have an FP stack truncation or an FPStack <-> SSE convert. // FPStack has extload and truncstore. SSE can fold direct loads into other // operations. Based on this, decide what we want to do. @@ -505,10 +505,10 @@ void X86DAGToDAGISel::PreprocessISelDAG() { MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. else MemVT = SrcIsSSE ? SrcVT : DstVT; - + SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); DebugLoc dl = N->getDebugLoc(); - + // FIXME: optimize the case where the src/dest is a load or store? SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0), @@ -524,12 +524,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() { // To avoid invalidating 'I', back it up to the convert node. --I; CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); - + // Now that we did that, the node is dead. Increment the iterator to the // next node to process, then delete N. ++I; CurDAG->DeleteNode(N); - } + } } @@ -584,7 +584,7 @@ bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset, bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ SDValue Address = N->getOperand(1); - + // load gs:0 -> GS segment register. // load fs:0 -> FS segment register. // @@ -593,7 +593,7 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ // For more information see http://people.redhat.com/drepper/tls.pdf if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address)) if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 && - Subtarget->isTargetELF()) + Subtarget->isTargetLinux()) switch (N->getPointerInfo().getAddrSpace()) { case 256: AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); @@ -602,7 +602,7 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); return false; } - + return true; } @@ -992,7 +992,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, case ISD::SHL: if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; - + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) { unsigned Val = CN->getZExtValue(); @@ -1167,7 +1167,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)) return false; AM = Backup; - + // Try again after commuting the operands. if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&& !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1)) @@ -1203,7 +1203,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM = Backup; } break; - + case ISD::AND: { // Perform some heroic transforms on an and of a constant-count shift // with a constant to enable use of the scaled offset field. @@ -1275,7 +1275,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { X86ISelAddressMode AM; - + if (Parent && // This list of opcodes are all the nodes that have an "addr:$ptr" operand // that are not a MemSDNode, and thus don't have proper addrspace info. @@ -1290,7 +1290,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, if (AddrSpace == 257) AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); } - + if (MatchAddress(N, AM)) return false; @@ -1336,7 +1336,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, // elements. This is a vector shuffle from the zero vector. if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() && // Check to see if the top elements are all zeros (or bitcast of zeros). - N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && + N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && N.getOperand(0).getNode()->hasOneUse() && ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && N.getOperand(0).getOperand(0).hasOneUse() && @@ -1411,7 +1411,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N, // If it isn't worth using an LEA, reject it. if (Complexity <= 2) return false; - + getAddressOperands(AM, Base, Scale, Index, Disp, Segment); return true; } @@ -1422,7 +1422,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base, SDValue &Disp, SDValue &Segment) { assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); - + X86ISelAddressMode AM; AM.GV = GA->getGlobal(); AM.Disp += GA->getOffset(); @@ -1435,7 +1435,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base, } else { AM.IndexReg = CurDAG->getRegister(0, MVT::i64); } - + getAddressOperands(AM, Base, Scale, Index, Disp, Segment); return true; } @@ -1449,7 +1449,7 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, !IsProfitableToFold(N, P, P) || !IsLegalToFold(N, P, P, OptLevel)) return false; - + return SelectAddr(N.getNode(), N.getOperand(1), Base, Scale, Index, Disp, Segment); } @@ -1700,7 +1700,7 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { if (Node->hasAnyUseOfValue(0)) return 0; - + // Optimize common patterns for __sync_or_and_fetch and similar arith // operations where the result is not used. This allows us to use the "lock" // version of the arithmetic instruction. @@ -1727,14 +1727,14 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { default: return 0; } - + bool isCN = false; ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val); if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) { isCN = true; Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT); } - + unsigned Opc = 0; switch (NVT.getSimpleVT().SimpleTy) { default: return 0; @@ -1772,7 +1772,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { } break; } - + assert(Opc != 0 && "Invalid arith lock transform!"); DebugLoc dl = Node->getDebugLoc(); @@ -1852,7 +1852,7 @@ static bool HasNoSignedComparisonUses(SDNode *N) { /// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode /// is suitable for doing the {load; increment or decrement; store} to modify /// transformation. -static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, +static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, SDValue StoredVal, SelectionDAG *CurDAG, LoadSDNode* &LoadNode, SDValue &InputChain) { @@ -1876,15 +1876,15 @@ static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, // Return LoadNode by reference. LoadNode = cast<LoadSDNode>(Load); // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8) - EVT LdVT = LoadNode->getMemoryVT(); - if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 && + EVT LdVT = LoadNode->getMemoryVT(); + if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 && LdVT != MVT::i8) return false; // Is store the only read of the loaded value? if (!Load.hasOneUse()) return false; - + // Is the address of the store the same as the load? if (LoadNode->getBasePtr() != StoreNode->getBasePtr() || LoadNode->getOffset() != StoreNode->getOffset()) @@ -1990,7 +1990,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { unsigned Opc, MOpc; unsigned Opcode = Node->getOpcode(); DebugLoc dl = Node->getDebugLoc(); - + DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n'); if (Node->isMachineOpcode()) { @@ -2062,7 +2062,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::ATOMSWAP64_DAG: { unsigned Opc; switch (Opcode) { - default: llvm_unreachable("Impossible intrinsic"); + default: llvm_unreachable("Impossible opcode"); case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break; case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break; case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break; @@ -2119,7 +2119,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val) break; - unsigned ShlOp, Op = 0; + unsigned ShlOp, Op; EVT CstVT = NVT; // Check the minimum bitwidth for the new constant. @@ -2142,6 +2142,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { ShlOp = X86::SHL32ri; switch (Opcode) { + default: llvm_unreachable("Impossible opcode"); case ISD::AND: Op = X86::AND32ri8; break; case ISD::OR: Op = X86::OR32ri8; break; case ISD::XOR: Op = X86::XOR32ri8; break; @@ -2152,6 +2153,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { ShlOp = X86::SHL64ri; switch (Opcode) { + default: llvm_unreachable("Impossible opcode"); case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break; case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break; case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break; @@ -2168,7 +2170,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::UMUL: { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); - + unsigned LoReg; switch (NVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unsupported VT!"); @@ -2177,20 +2179,20 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break; case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break; } - + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, N0, SDValue()).getValue(1); - + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); SDValue Ops[] = {N1, InFlag}; SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2); - + ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2)); return NULL; } - + case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: { SDValue N0 = Node->getOperand(0); @@ -2287,7 +2289,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { ReplaceUses(SDValue(Node, 1), Result); DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } - + return NULL; } @@ -2438,7 +2440,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return NULL; } - case X86ISD::CMP: { + case X86ISD::CMP: + case X86ISD::SUB: { + // Sometimes a SUB is used to perform comparison. + if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0)) + // This node is not a CMP. + break; SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); @@ -2555,7 +2562,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // a simple increment or decrement through memory of that value, if the // uses of the modified value and its address are suitable. // The DEC64m tablegen pattern is currently not able to match the case where - // the EFLAGS on the original DEC are used. (This also applies to + // the EFLAGS on the original DEC are used. (This also applies to // {INC,DEC}X{64,32,16,8}.) // We'll need to improve tablegen to allow flags to be transferred from a // node in the pattern to the result node. probably with a new keyword @@ -2587,7 +2594,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MemOp[0] = StoreNode->getMemOperand(); MemOp[1] = LoadNode->getMemOperand(); const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain }; - EVT LdVT = LoadNode->getMemoryVT(); + EVT LdVT = LoadNode->getMemoryVT(); unsigned newOpc = getFusedLdStOpcode(LdVT, Opc); MachineSDNode *Result = CurDAG->getMachineNode(newOpc, Node->getDebugLoc(), @@ -2600,6 +2607,85 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return Result; } + + // FIXME: Custom handling because TableGen doesn't support multiple implicit + // defs in an instruction pattern + case X86ISD::PCMPESTRI: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue N2 = Node->getOperand(2); + SDValue N3 = Node->getOperand(3); + SDValue N4 = Node->getOperand(4); + + // Make sure last argument is a constant + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N4); + if (!Cst) + break; + + uint64_t Imm = Cst->getZExtValue(); + + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, + X86::EAX, N1, SDValue()).getValue(1); + InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX, + N3, InFlag).getValue(1); + + SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag }; + unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : + X86::PCMPESTRIrr; + InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, + array_lengthof(Ops)), 0); + + if (!SDValue(Node, 0).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::ECX, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(SDValue(Node, 0), Result); + } + if (!SDValue(Node, 1).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::EFLAGS, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(SDValue(Node, 1), Result); + } + + return NULL; + } + + // FIXME: Custom handling because TableGen doesn't support multiple implicit + // defs in an instruction pattern + case X86ISD::PCMPISTRI: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue N2 = Node->getOperand(2); + + // Make sure last argument is a constant + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N2); + if (!Cst) + break; + + uint64_t Imm = Cst->getZExtValue(); + + SDValue Ops[] = { N0, N1, getI8Imm(Imm) }; + unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : + X86::PCMPISTRIrr; + SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, + array_lengthof(Ops)), 0); + + if (!SDValue(Node, 0).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::ECX, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(SDValue(Node, 0), Result); + } + if (!SDValue(Node, 1).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::EFLAGS, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(SDValue(Node, 1), Result); + } + + return NULL; + } } SDNode *ResNode = SelectCode(Node); @@ -2627,7 +2713,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, return true; break; } - + OutOps.push_back(Op0); OutOps.push_back(Op1); OutOps.push_back(Op2); @@ -2636,7 +2722,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, return false; } -/// createX86ISelDag - This pass converts a legalized DAG into a +/// createX86ISelDag - This pass converts a legalized DAG into a /// X86-specific DAG, ready for instruction scheduling. /// FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b88f2fa..7954170 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -66,7 +66,7 @@ static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, DebugLoc dl) { EVT VT = Vec.getValueType(); - assert(VT.getSizeInBits() == 256 && "Unexpected vector size!"); + assert(VT.is256BitVector() && "Unexpected vector size!"); EVT ElVT = VT.getVectorElementType(); unsigned Factor = VT.getSizeInBits()/128; EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, @@ -105,7 +105,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, return Result; EVT VT = Vec.getValueType(); - assert(VT.getSizeInBits() == 128 && "Unexpected vector size!"); + assert(VT.is128BitVector() && "Unexpected vector size!"); EVT ElVT = VT.getVectorElementType(); EVT ResultVT = Result.getValueType(); @@ -174,7 +174,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // For 64-bit since we have so many registers use the ILP scheduler, for // 32-bit code use the register pressure specific scheduling. // For Atom, always use ILP scheduling. - if (Subtarget->isAtom()) + if (Subtarget->isAtom()) setSchedulingPreference(Sched::ILP); else if (Subtarget->is64Bit()) setSchedulingPreference(Sched::ILP); @@ -731,6 +731,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FMA, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); @@ -828,7 +829,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); setOperationAction(ISD::SELECT, MVT::v4f32, Custom); - setOperationAction(ISD::SETCC, MVT::v4f32, Custom); } if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) { @@ -869,27 +869,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); - // Custom lower build_vector, vector_shuffle, and extract_vector_elt. for (int i = MVT::v16i8; i != MVT::v2i64; ++i) { - EVT VT = (MVT::SimpleValueType)i; + MVT VT = (MVT::SimpleValueType)i; // Do not attempt to custom lower non-power-of-2 vectors if (!isPowerOf2_32(VT.getVectorNumElements())) continue; // Do not attempt to custom lower non-128-bit vectors if (!VT.is128BitVector()) continue; - setOperationAction(ISD::BUILD_VECTOR, - VT.getSimpleVT().SimpleTy, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, - VT.getSimpleVT().SimpleTy, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, - VT.getSimpleVT().SimpleTy, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); } setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); @@ -906,23 +897,22 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. for (int i = MVT::v16i8; i != MVT::v2i64; ++i) { - MVT::SimpleValueType SVT = (MVT::SimpleValueType)i; - EVT VT = SVT; + MVT VT = (MVT::SimpleValueType)i; // Do not attempt to promote non-128-bit vectors if (!VT.is128BitVector()) continue; - setOperationAction(ISD::AND, SVT, Promote); - AddPromotedToType (ISD::AND, SVT, MVT::v2i64); - setOperationAction(ISD::OR, SVT, Promote); - AddPromotedToType (ISD::OR, SVT, MVT::v2i64); - setOperationAction(ISD::XOR, SVT, Promote); - AddPromotedToType (ISD::XOR, SVT, MVT::v2i64); - setOperationAction(ISD::LOAD, SVT, Promote); - AddPromotedToType (ISD::LOAD, SVT, MVT::v2i64); - setOperationAction(ISD::SELECT, SVT, Promote); - AddPromotedToType (ISD::SELECT, SVT, MVT::v2i64); + setOperationAction(ISD::AND, VT, Promote); + AddPromotedToType (ISD::AND, VT, MVT::v2i64); + setOperationAction(ISD::OR, VT, Promote); + AddPromotedToType (ISD::OR, VT, MVT::v2i64); + setOperationAction(ISD::XOR, VT, Promote); + AddPromotedToType (ISD::XOR, VT, MVT::v2i64); + setOperationAction(ISD::LOAD, VT, Promote); + AddPromotedToType (ISD::LOAD, VT, MVT::v2i64); + setOperationAction(ISD::SELECT, VT, Promote); + AddPromotedToType (ISD::SELECT, VT, MVT::v2i64); } setTruncStoreAction(MVT::f64, MVT::f32, Expand); @@ -1009,9 +999,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } } - if (Subtarget->hasSSE42()) - setOperationAction(ISD::SETCC, MVT::v2i64, Custom); - if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) { addRegisterClass(MVT::v32i8, &X86::VR256RegClass); addRegisterClass(MVT::v16i16, &X86::VR256RegClass); @@ -1042,13 +1029,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f64, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i64, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i8, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i16, Custom); - setOperationAction(ISD::SRL, MVT::v16i16, Custom); setOperationAction(ISD::SRL, MVT::v32i8, Custom); @@ -1072,6 +1052,15 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSELECT, MVT::v8i32, Legal); setOperationAction(ISD::VSELECT, MVT::v8f32, Legal); + if (Subtarget->hasFMA()) { + setOperationAction(ISD::FMA, MVT::v8f32, Custom); + setOperationAction(ISD::FMA, MVT::v4f64, Custom); + setOperationAction(ISD::FMA, MVT::v4f32, Custom); + setOperationAction(ISD::FMA, MVT::v2f64, Custom); + setOperationAction(ISD::FMA, MVT::f32, Custom); + setOperationAction(ISD::FMA, MVT::f64, Custom); + } + if (Subtarget->hasAVX2()) { setOperationAction(ISD::ADD, MVT::v4i64, Legal); setOperationAction(ISD::ADD, MVT::v8i32, Legal); @@ -1125,45 +1114,44 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Custom lower several nodes for 256-bit types. for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE; ++i) { - MVT::SimpleValueType SVT = (MVT::SimpleValueType)i; - EVT VT = SVT; + MVT VT = (MVT::SimpleValueType)i; // Extract subvector is special because the value type // (result) is 128-bit but the source is 256-bit wide. if (VT.is128BitVector()) - setOperationAction(ISD::EXTRACT_SUBVECTOR, SVT, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); // Do not attempt to custom lower other non-256-bit vectors if (!VT.is256BitVector()) continue; - setOperationAction(ISD::BUILD_VECTOR, SVT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, SVT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, SVT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, SVT, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, SVT, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, SVT, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); } // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64. for (int i = MVT::v32i8; i != MVT::v4i64; ++i) { - MVT::SimpleValueType SVT = (MVT::SimpleValueType)i; - EVT VT = SVT; + MVT VT = (MVT::SimpleValueType)i; // Do not attempt to promote non-256-bit vectors if (!VT.is256BitVector()) continue; - setOperationAction(ISD::AND, SVT, Promote); - AddPromotedToType (ISD::AND, SVT, MVT::v4i64); - setOperationAction(ISD::OR, SVT, Promote); - AddPromotedToType (ISD::OR, SVT, MVT::v4i64); - setOperationAction(ISD::XOR, SVT, Promote); - AddPromotedToType (ISD::XOR, SVT, MVT::v4i64); - setOperationAction(ISD::LOAD, SVT, Promote); - AddPromotedToType (ISD::LOAD, SVT, MVT::v4i64); - setOperationAction(ISD::SELECT, SVT, Promote); - AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64); + setOperationAction(ISD::AND, VT, Promote); + AddPromotedToType (ISD::AND, VT, MVT::v4i64); + setOperationAction(ISD::OR, VT, Promote); + AddPromotedToType (ISD::OR, VT, MVT::v4i64); + setOperationAction(ISD::XOR, VT, Promote); + AddPromotedToType (ISD::XOR, VT, MVT::v4i64); + setOperationAction(ISD::LOAD, VT, Promote); + AddPromotedToType (ISD::LOAD, VT, MVT::v4i64); + setOperationAction(ISD::SELECT, VT, Promote); + AddPromotedToType (ISD::SELECT, VT, MVT::v4i64); } } @@ -1221,6 +1209,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::FADD); setTargetDAGCombine(ISD::FSUB); + setTargetDAGCombine(ISD::FMA); setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); @@ -1718,21 +1707,37 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, /// CallIsStructReturn - Determines whether a call uses struct return /// semantics. -static bool CallIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) { +enum StructReturnType { + NotStructReturn, + RegStructReturn, + StackStructReturn +}; +static StructReturnType +callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) { if (Outs.empty()) - return false; + return NotStructReturn; - return Outs[0].Flags.isSRet(); + const ISD::ArgFlagsTy &Flags = Outs[0].Flags; + if (!Flags.isSRet()) + return NotStructReturn; + if (Flags.isInReg()) + return RegStructReturn; + return StackStructReturn; } /// ArgsAreStructReturn - Determines whether a function uses struct /// return semantics. -static bool -ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) { +static StructReturnType +argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) { if (Ins.empty()) - return false; + return NotStructReturn; - return Ins[0].Flags.isSRet(); + const ISD::ArgFlagsTy &Flags = Ins[0].Flags; + if (!Flags.isSRet()) + return NotStructReturn; + if (Flags.isInReg()) + return RegStructReturn; + return StackStructReturn; } /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified @@ -1876,9 +1881,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, RC = &X86::FR32RegClass; else if (RegVT == MVT::f64) RC = &X86::FR64RegClass; - else if (RegVT.isVector() && RegVT.getSizeInBits() == 256) + else if (RegVT.is256BitVector()) RC = &X86::VR256RegClass; - else if (RegVT.isVector() && RegVT.getSizeInBits() == 128) + else if (RegVT.is128BitVector()) RC = &X86::VR128RegClass; else if (RegVT == MVT::x86mmx) RC = &X86::VR64RegClass; @@ -2073,7 +2078,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. // If this is an sret function, the return should pop the hidden pointer. if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows && - ArgsAreStructReturn(Ins)) + argsAreStructReturn(Ins) == StackStructReturn) FuncInfo->setBytesToPopOnReturn(4); } @@ -2163,7 +2168,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isTargetWin64(); bool IsWindows = Subtarget->isTargetWindows(); - bool IsStructRet = CallIsStructReturn(Outs); + StructReturnType SR = callIsStructReturn(Outs); bool IsSibcall = false; if (MF.getTarget().Options.DisableTailCalls) @@ -2172,8 +2177,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (isTailCall) { // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, - isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), - Outs, OutVals, Ins, DAG); + isVarArg, SR != NotStructReturn, + MF.getFunction()->hasStructRetAttr(), + Outs, OutVals, Ins, DAG); // Sibcalls are automatically detected tailcalls which do not require // ABI changes. @@ -2255,7 +2261,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); break; case CCValAssign::AExt: - if (RegVT.isVector() && RegVT.getSizeInBits() == 128) { + if (RegVT.is128BitVector()) { // Special case: passing MMX values in XMM registers. Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); @@ -2549,7 +2555,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, getTargetMachine().Options.GuaranteedTailCallOpt)) NumBytesForCalleeToPush = NumBytes; // Callee pops everything else if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows && - IsStructRet) + SR == StackStructReturn) // If this is a call to a struct-return function, the callee // pops the hidden struct pointer, so we have to push it back. // This is common for Darwin/X86, Linux & Mingw32 targets. @@ -2870,8 +2876,9 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, } FastISel * -X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { - return X86::createFastISel(funcInfo); +X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) const { + return X86::createFastISel(funcInfo, libInfo); } @@ -3397,11 +3404,11 @@ static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX, /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) { - unsigned NumElems = VT.getVectorNumElements(); - - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return false; + unsigned NumElems = VT.getVectorNumElements(); + if (NumElems != 4) return false; @@ -3416,11 +3423,11 @@ static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) { /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, /// <2, 3, 2, 3> static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) { - unsigned NumElems = VT.getVectorNumElements(); - - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return false; + unsigned NumElems = VT.getVectorNumElements(); + if (NumElems != 4) return false; @@ -3433,7 +3440,7 @@ static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) { /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) { - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return false; unsigned NumElems = VT.getVectorNumElements(); @@ -3455,10 +3462,12 @@ static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) { /// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLHPS. static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) { + if (!VT.is128BitVector()) + return false; + unsigned NumElems = VT.getVectorNumElements(); - if ((NumElems != 2 && NumElems != 4) - || VT.getSizeInBits() > 128) + if (NumElems != 2 && NumElems != 4) return false; for (unsigned i = 0, e = NumElems/2; i != e; ++i) @@ -3675,7 +3684,7 @@ static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) { static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) { if (VT.getVectorElementType().getSizeInBits() < 32) return false; - if (VT.getSizeInBits() == 256) + if (!VT.is128BitVector()) return false; unsigned NumElts = VT.getVectorNumElements(); @@ -3697,7 +3706,7 @@ static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) { /// The first half comes from the second half of V1 and the second half from the /// the second half of V2. static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX) { - if (!HasAVX || VT.getSizeInBits() != 256) + if (!HasAVX || !VT.is256BitVector()) return false; // The shuffle result is divided into half A and half B. In total the two @@ -3789,9 +3798,10 @@ static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) { /// element of vector 2 and the other elements to come from vector 1 in order. static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT, bool V2IsSplat = false, bool V2IsUndef = false) { - unsigned NumOps = VT.getVectorNumElements(); - if (VT.getSizeInBits() == 256) + if (!VT.is128BitVector()) return false; + + unsigned NumOps = VT.getVectorNumElements(); if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) return false; @@ -3857,9 +3867,11 @@ static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT, /// specifies a shuffle of elements that is suitable for input to 256-bit /// version of MOVDDUP. static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) { - unsigned NumElts = VT.getVectorNumElements(); + if (!HasAVX || !VT.is256BitVector()) + return false; - if (!HasAVX || VT.getSizeInBits() != 256 || NumElts != 4) + unsigned NumElts = VT.getVectorNumElements(); + if (NumElts != 4) return false; for (unsigned i = 0; i != NumElts/2; ++i) @@ -3875,7 +3887,7 @@ static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) { /// specifies a shuffle of elements that is suitable for input to 128-bit /// version of MOVDDUP. static bool isMOVDDUPMask(ArrayRef<int> Mask, EVT VT) { - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return false; unsigned e = VT.getVectorNumElements() / 2; @@ -4120,7 +4132,7 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, /// V1 (and in order), and the upper half elements should come from the upper /// half of V2 (and in order). static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, EVT VT) { - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return false; if (VT.getVectorNumElements() != 4) return false; @@ -4177,7 +4189,7 @@ static bool WillBeConstantPoolLoad(SDNode *N) { /// MOVLP, it must be either a vector load or a scalar load to vector. static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, ArrayRef<int> Mask, EVT VT) { - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return false; if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) @@ -4719,7 +4731,7 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { // Although the logic below support any bitwidth size, there are no // shift instructions which handle more than 128-bit vectors. - if (SVOp->getValueType(0).getSizeInBits() > 128) + if (!SVOp->getValueType(0).is128BitVector()) return false; if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) || @@ -4814,7 +4826,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, SelectionDAG &DAG, const TargetLowering &TLI, DebugLoc dl) { - assert(VT.getSizeInBits() == 128 && "Unknown type for VShift"); + assert(VT.is128BitVector() && "Unknown type for VShift"); EVT ShVT = MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ; SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp); @@ -5047,7 +5059,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const { } } - bool Is256 = VT.getSizeInBits() == 256; + bool Is256 = VT.is256BitVector(); // Handle the broadcasting a single constant scalar from the constant pool // into a vector. On Sandybridge it is still better to load a constant vector @@ -5102,6 +5114,86 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const { return SDValue(); } +// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64 +// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the +// constraint of matching input/output vector elements. +SDValue +X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + SDNode *N = Op.getNode(); + EVT VT = Op.getValueType(); + unsigned NumElts = Op.getNumOperands(); + + // Check supported types and sub-targets. + // + // Only v2f32 -> v2f64 needs special handling. + if (VT != MVT::v2f64 || !Subtarget->hasSSE2()) + return SDValue(); + + SDValue VecIn; + EVT VecInVT; + SmallVector<int, 8> Mask; + EVT SrcVT = MVT::Other; + + // Check the patterns could be translated into X86vfpext. + for (unsigned i = 0; i < NumElts; ++i) { + SDValue In = N->getOperand(i); + unsigned Opcode = In.getOpcode(); + + // Skip if the element is undefined. + if (Opcode == ISD::UNDEF) { + Mask.push_back(-1); + continue; + } + + // Quit if one of the elements is not defined from 'fpext'. + if (Opcode != ISD::FP_EXTEND) + return SDValue(); + + // Check how the source of 'fpext' is defined. + SDValue L2In = In.getOperand(0); + EVT L2InVT = L2In.getValueType(); + + // Check the original type + if (SrcVT == MVT::Other) + SrcVT = L2InVT; + else if (SrcVT != L2InVT) // Quit if non-homogenous typed. + return SDValue(); + + // Check whether the value being 'fpext'ed is extracted from the same + // source. + Opcode = L2In.getOpcode(); + + // Quit if it's not extracted with a constant index. + if (Opcode != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(L2In.getOperand(1))) + return SDValue(); + + SDValue ExtractedFromVec = L2In.getOperand(0); + + if (VecIn.getNode() == 0) { + VecIn = ExtractedFromVec; + VecInVT = ExtractedFromVec.getValueType(); + } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec. + return SDValue(); + + Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue()); + } + + // Quit if all operands of BUILD_VECTOR are undefined. + if (!VecIn.getNode()) + return SDValue(); + + // Fill the remaining mask as undef. + for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i) + Mask.push_back(-1); + + return DAG.getNode(X86ISD::VFPEXT, DL, VT, + DAG.getVectorShuffle(VecInVT, DL, + VecIn, DAG.getUNDEF(VecInVT), + &Mask[0])); +} + SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); @@ -5134,6 +5226,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (Broadcast.getNode()) return Broadcast; + SDValue FpExt = LowerVectorFpExtend(Op, DAG); + if (FpExt.getNode()) + return FpExt; + unsigned EVTBits = ExtVT.getSizeInBits(); unsigned NumZero = 0; @@ -5209,12 +5305,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 || (ExtVT == MVT::i64 && Subtarget->is64Bit())) { - if (VT.getSizeInBits() == 256) { + if (VT.is256BitVector()) { SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl); return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec, Item, DAG.getIntPtrConstant(0)); } - assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!"); + assert(VT.is128BitVector() && "Expected an SSE value type!"); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); @@ -5223,11 +5319,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); - if (VT.getSizeInBits() == 256) { + if (VT.is256BitVector()) { SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl); Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl); } else { - assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!"); + assert(VT.is128BitVector() && "Expected an SSE value type!"); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } return DAG.getNode(ISD::BITCAST, dl, VT, Item); @@ -5287,7 +5383,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // For AVX-length vectors, build the individual 128-bit pieces and use // shuffles to put them in place. - if (VT.getSizeInBits() == 256) { + if (VT.is256BitVector()) { SmallVector<SDValue, 32> V; for (unsigned i = 0; i != NumElems; ++i) V.push_back(Op.getOperand(i)); @@ -5368,7 +5464,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]); } - if (Values.size() > 1 && VT.getSizeInBits() == 128) { + if (Values.size() > 1 && VT.is128BitVector()) { // Check for a build vector of consecutive loads. for (unsigned i = 0; i < NumElems; ++i) V[i] = Op.getOperand(i); @@ -5429,39 +5525,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -// LowerMMXCONCAT_VECTORS - We support concatenate two MMX registers and place -// them in a MMX register. This is better than doing a stack convert. -static SDValue LowerMMXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { - DebugLoc dl = Op.getDebugLoc(); - EVT ResVT = Op.getValueType(); - - assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 || - ResVT == MVT::v8i16 || ResVT == MVT::v16i8); - int Mask[2]; - SDValue InVec = DAG.getNode(ISD::BITCAST,dl, MVT::v1i64, Op.getOperand(0)); - SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec); - InVec = Op.getOperand(1); - if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { - unsigned NumElts = ResVT.getVectorNumElements(); - VecOp = DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp); - VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp, - InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1)); - } else { - InVec = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, InVec); - SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec); - Mask[0] = 0; Mask[1] = 2; - VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask); - } - return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp); -} - // LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction // to create 256-bit vectors from two other 128-bit ones. static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); EVT ResVT = Op.getValueType(); - assert(ResVT.getSizeInBits() == 256 && "Value type must be 256-bit wide"); + assert(ResVT.is256BitVector() && "Value type must be 256-bit wide"); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); @@ -5472,16 +5542,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { - EVT ResVT = Op.getValueType(); - assert(Op.getNumOperands() == 2); - assert((ResVT.getSizeInBits() == 128 || ResVT.getSizeInBits() == 256) && - "Unsupported CONCAT_VECTORS for value type"); - - // We support concatenate two MMX registers and place them in a MMX register. - // This is better than doing a stack convert. - if (ResVT.is128BitVector()) - return LowerMMXCONCAT_VECTORS(Op, DAG); // 256-bit AVX can use the vinsertf128 instruction to create 256-bit vectors // from two other 128-bit ones. @@ -6131,7 +6192,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { DebugLoc dl = SVOp->getDebugLoc(); EVT VT = SVOp->getValueType(0); - assert(VT.getSizeInBits() == 128 && "Unsupported vector size"); + assert(VT.is128BitVector() && "Unsupported vector size"); std::pair<int, int> Locs[4]; int Mask1[] = { -1, -1, -1, -1 }; @@ -6759,7 +6820,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // Handle all 128-bit wide vectors with 4 elements, and match them with // several different shuffle types. - if (NumElems == 4 && VT.getSizeInBits() == 128) + if (NumElems == 4 && VT.is128BitVector()) return LowerVECTOR_SHUFFLE_128v4(SVOp, DAG); // Handle general 256-bit shuffles @@ -6775,7 +6836,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); - if (Op.getOperand(0).getValueType().getSizeInBits() != 128) + if (!Op.getOperand(0).getValueType().is128BitVector()) return SDValue(); if (VT.getSizeInBits() == 8) { @@ -6845,7 +6906,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, // If this is a 256-bit vector result, first extract the 128-bit vector and // then extract the element from the 128-bit vector. - if (VecVT.getSizeInBits() == 256) { + if (VecVT.is256BitVector()) { DebugLoc dl = Op.getNode()->getDebugLoc(); unsigned NumElems = VecVT.getVectorNumElements(); SDValue Idx = Op.getOperand(1); @@ -6860,7 +6921,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, DAG.getConstant(IdxVal, MVT::i32)); } - assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length"); + assert(VecVT.is128BitVector() && "Unexpected vector length"); if (Subtarget->hasSSE41()) { SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); @@ -6936,7 +6997,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); - if (VT.getSizeInBits() == 256) + if (!VT.is128BitVector()) return SDValue(); if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) && @@ -6992,7 +7053,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { // If this is a 256-bit vector result, first extract the 128-bit vector, // insert the element into the extracted half and then place it back. - if (VT.getSizeInBits() == 256) { + if (VT.is256BitVector()) { if (!isa<ConstantSDNode>(N2)) return SDValue(); @@ -7036,7 +7097,7 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { // If this is a 256-bit vector result, first insert into a 128-bit // vector and then insert into the 256-bit vector. - if (OpVT.getSizeInBits() > 128) { + if (!OpVT.is128BitVector()) { // Insert into a 128-bit vector. EVT VT128 = EVT::getVectorVT(*Context, OpVT.getVectorElementType(), @@ -7053,7 +7114,7 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0)); SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); - assert(OpVT.getSizeInBits() == 128 && "Expected an SSE type!"); + assert(OpVT.is128BitVector() && "Expected an SSE type!"); return DAG.getNode(ISD::BITCAST, dl, OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt)); } @@ -7068,8 +7129,8 @@ X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { SDValue Vec = Op.getNode()->getOperand(0); SDValue Idx = Op.getNode()->getOperand(1); - if (Op.getNode()->getValueType(0).getSizeInBits() == 128 && - Vec.getNode()->getValueType(0).getSizeInBits() == 256 && + if (Op.getNode()->getValueType(0).is128BitVector() && + Vec.getNode()->getValueType(0).is256BitVector() && isa<ConstantSDNode>(Idx)) { unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); return Extract128BitVector(Vec, IdxVal, DAG, dl); @@ -7089,8 +7150,8 @@ X86TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { SDValue SubVec = Op.getNode()->getOperand(1); SDValue Idx = Op.getNode()->getOperand(2); - if (Op.getNode()->getValueType(0).getSizeInBits() == 256 && - SubVec.getNode()->getValueType(0).getSizeInBits() == 128 && + if (Op.getNode()->getValueType(0).is256BitVector() && + SubVec.getNode()->getValueType(0).is128BitVector() && isa<ConstantSDNode>(Idx)) { unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl); @@ -7735,9 +7796,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, punpckldq (c0), %xmm0 // c0: (uint4){ 0x43300000U, 0x45300000U, 0U, 0U } subpd (c1), %xmm0 // c1: (double2){ 0x1.0p52, 0x1.0p52 * 0x1.0p32 } #ifdef __SSE3__ - haddpd %xmm0, %xmm0 + haddpd %xmm0, %xmm0 #else - pshufd $0x4e, %xmm0, %xmm1 + pshufd $0x4e, %xmm0, %xmm1 addpd %xmm1, %xmm0 #endif */ @@ -8064,7 +8125,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, EltVT = VT.getVectorElementType(); Constant *C; if (EltVT == MVT::f64) { - C = ConstantVector::getSplat(2, + C = ConstantVector::getSplat(2, ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))))); } else { C = ConstantVector::getSplat(4, @@ -8098,7 +8159,7 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo::getConstantPool(), false, false, false, 16); if (VT.isVector()) { - MVT XORVT = VT.getSizeInBits() == 128 ? MVT::v2i64 : MVT::v4i64; + MVT XORVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(ISD::XOR, dl, XORVT, DAG.getNode(ISD::BITCAST, dl, XORVT, @@ -8226,7 +8287,33 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, unsigned Opcode = 0; unsigned NumOperands = 0; - switch (Op.getNode()->getOpcode()) { + + // Truncate operations may prevent the merge of the SETCC instruction + // and the arithmetic intruction before it. Attempt to truncate the operands + // of the arithmetic instruction and use a reduced bit-width instruction. + bool NeedTruncation = false; + SDValue ArithOp = Op; + if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) { + SDValue Arith = Op->getOperand(0); + // Both the trunc and the arithmetic op need to have one user each. + if (Arith->hasOneUse()) + switch (Arith.getOpcode()) { + default: break; + case ISD::ADD: + case ISD::SUB: + case ISD::AND: + case ISD::OR: + case ISD::XOR: { + NeedTruncation = true; + ArithOp = Arith; + } + } + } + + // NOTICE: In the code below we use ArithOp to hold the arithmetic operation + // which may be the result of a CAST. We use the variable 'Op', which is the + // non-casted variable when we check for possible users. + switch (ArithOp.getOpcode()) { case ISD::ADD: // Due to an isel shortcoming, be conservative if this add is likely to be // selected as part of a load-modify-store instruction. When the root node @@ -8246,7 +8333,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, goto default_case; if (ConstantSDNode *C = - dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) { + dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) { // An add of one will be selected as an INC. if (C->getAPIntValue() == 1) { Opcode = X86ISD::INC; @@ -8282,7 +8369,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, if (User->getOpcode() != ISD::BRCOND && User->getOpcode() != ISD::SETCC && - (User->getOpcode() != ISD::SELECT || UOpNo != 0)) { + !(User->getOpcode() == ISD::SELECT && UOpNo == 0)) { NonFlagUse = true; break; } @@ -8303,15 +8390,9 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, goto default_case; // Otherwise use a regular EFLAGS-setting instruction. - switch (Op.getNode()->getOpcode()) { + switch (ArithOp.getOpcode()) { default: llvm_unreachable("unexpected operator!"); - case ISD::SUB: - // If the only use of SUB is EFLAGS, use CMP instead. - if (Op.hasOneUse()) - Opcode = X86ISD::CMP; - else - Opcode = X86ISD::SUB; - break; + case ISD::SUB: Opcode = X86ISD::SUB; break; case ISD::OR: Opcode = X86ISD::OR; break; case ISD::XOR: Opcode = X86ISD::XOR; break; case ISD::AND: Opcode = X86ISD::AND; break; @@ -8332,19 +8413,40 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, break; } + // If we found that truncation is beneficial, perform the truncation and + // update 'Op'. + if (NeedTruncation) { + EVT VT = Op.getValueType(); + SDValue WideVal = Op->getOperand(0); + EVT WideVT = WideVal.getValueType(); + unsigned ConvertedOp = 0; + // Use a target machine opcode to prevent further DAGCombine + // optimizations that may separate the arithmetic operations + // from the setcc node. + switch (WideVal.getOpcode()) { + default: break; + case ISD::ADD: ConvertedOp = X86ISD::ADD; break; + case ISD::SUB: ConvertedOp = X86ISD::SUB; break; + case ISD::AND: ConvertedOp = X86ISD::AND; break; + case ISD::OR: ConvertedOp = X86ISD::OR; break; + case ISD::XOR: ConvertedOp = X86ISD::XOR; break; + } + + if (ConvertedOp) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) { + SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0)); + SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1)); + Op = DAG.getNode(ConvertedOp, dl, VT, V0, V1); + } + } + } + if (Opcode == 0) // Emit a CMP with 0, which is the TEST pattern. return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, DAG.getConstant(0, Op.getValueType())); - if (Opcode == X86ISD::CMP) { - SDValue New = DAG.getNode(Opcode, dl, MVT::i32, Op.getOperand(0), - Op.getOperand(1)); - // We can't replace usage of SUB with CMP. - // The SUB node will be removed later because there is no use of it. - return SDValue(New.getNode(), 0); - } - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); SmallVector<SDValue, 4> Ops; for (unsigned i = 0; i != NumOperands; ++i) @@ -8364,6 +8466,14 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, return EmitTest(Op0, X86CC, DAG); DebugLoc dl = Op0.getDebugLoc(); + if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 || + Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) { + // Use SUB instead of CMP to enable CSE between SUB and CMP. + SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32); + SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, + Op0, Op1); + return SDValue(Sub.getNode(), 1); + } return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1); } @@ -8522,7 +8632,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); - assert(VT.getSizeInBits() == 256 && Op.getOpcode() == ISD::SETCC && + assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC && "Unsupported value type for operation"); unsigned NumElems = VT.getVectorNumElements(); @@ -8559,10 +8669,12 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); if (isFP) { - unsigned SSECC = 8; +#ifndef NDEBUG EVT EltVT = Op0.getValueType().getVectorElementType(); - assert(EltVT == MVT::f32 || EltVT == MVT::f64); (void)EltVT; + assert(EltVT == MVT::f32 || EltVT == MVT::f64); +#endif + unsigned SSECC; bool Swap = false; // SSE Condition code mapping: @@ -8575,7 +8687,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { // 6 - NLE // 7 - ORD switch (SetCCOpcode) { - default: break; + default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETOEQ: case ISD::SETEQ: SSECC = 0; break; case ISD::SETOGT: @@ -8589,34 +8701,33 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { case ISD::SETUO: SSECC = 3; break; case ISD::SETUNE: case ISD::SETNE: SSECC = 4; break; - case ISD::SETULE: Swap = true; + case ISD::SETULE: Swap = true; // Fallthrough case ISD::SETUGE: SSECC = 5; break; - case ISD::SETULT: Swap = true; + case ISD::SETULT: Swap = true; // Fallthrough case ISD::SETUGT: SSECC = 6; break; case ISD::SETO: SSECC = 7; break; + case ISD::SETUEQ: + case ISD::SETONE: SSECC = 8; break; } if (Swap) std::swap(Op0, Op1); // In the two special cases we can't handle, emit two comparisons. if (SSECC == 8) { + unsigned CC0, CC1; + unsigned CombineOpc; if (SetCCOpcode == ISD::SETUEQ) { - SDValue UNORD, EQ; - UNORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, - DAG.getConstant(3, MVT::i8)); - EQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, - DAG.getConstant(0, MVT::i8)); - return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ); - } - if (SetCCOpcode == ISD::SETONE) { - SDValue ORD, NEQ; - ORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, - DAG.getConstant(7, MVT::i8)); - NEQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, - DAG.getConstant(4, MVT::i8)); - return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ); + CC0 = 3; CC1 = 0; CombineOpc = ISD::OR; + } else { + assert(SetCCOpcode == ISD::SETONE); + CC0 = 7; CC1 = 4; CombineOpc = ISD::AND; } - llvm_unreachable("Illegal FP comparison"); + + SDValue Cmp0 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, + DAG.getConstant(CC0, MVT::i8)); + SDValue Cmp1 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, + DAG.getConstant(CC1, MVT::i8)); + return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1); } // Handle all other FP comparisons here. return DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, @@ -8624,17 +8735,17 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { } // Break 256-bit integer vector compare into smaller ones. - if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2()) + if (VT.is256BitVector() && !Subtarget->hasAVX2()) return Lower256IntVSETCC(Op, DAG); // We are handling one of the integer comparisons here. Since SSE only has // GT and EQ comparisons for integer, swapping operands and multiple // operations may be required for some comparisons. - unsigned Opc = 0; + unsigned Opc; bool Swap = false, Invert = false, FlipSigns = false; switch (SetCCOpcode) { - default: break; + default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETNE: Invert = true; case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break; case ISD::SETLT: Swap = true; @@ -8651,10 +8762,12 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { // Check that the operation in question is available (most are plain SSE2, // but PCMPGTQ and PCMPEQQ have different requirements). - if (Opc == X86ISD::PCMPGT && VT == MVT::v2i64 && !Subtarget->hasSSE42()) - return SDValue(); - if (Opc == X86ISD::PCMPEQ && VT == MVT::v2i64 && !Subtarget->hasSSE41()) - return SDValue(); + if (VT == MVT::v2i64) { + if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) + return SDValue(); + if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) + return SDValue(); + } // Since SSE has no unsigned integer comparisons, we need to flip the sign // bits of the inputs before performing those operations. @@ -8714,6 +8827,16 @@ static bool isAllOnes(SDValue V) { return C && C->isAllOnesValue(); } +static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) { + if (V.getOpcode() != ISD::TRUNCATE) + return false; + + SDValue VOp0 = V.getOperand(0); + unsigned InBits = VOp0.getValueSizeInBits(); + unsigned Bits = V.getValueSizeInBits(); + return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits)); +} + SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { bool addTest = true; SDValue Cond = Op.getOperand(0); @@ -8728,46 +8851,6 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { Cond = NewCond; } - // Handle the following cases related to max and min: - // (a > b) ? (a-b) : 0 - // (a >= b) ? (a-b) : 0 - // (b < a) ? (a-b) : 0 - // (b <= a) ? (a-b) : 0 - // Comparison is removed to use EFLAGS from SUB. - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2)) - if (Cond.getOpcode() == X86ISD::SETCC && - Cond.getOperand(1).getOpcode() == X86ISD::CMP && - (Op1.getOpcode() == ISD::SUB || Op1.getOpcode() == X86ISD::SUB) && - C->getAPIntValue() == 0) { - SDValue Cmp = Cond.getOperand(1); - unsigned CC = cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue(); - if ((DAG.isEqualTo(Op1.getOperand(0), Cmp.getOperand(0)) && - DAG.isEqualTo(Op1.getOperand(1), Cmp.getOperand(1)) && - (CC == X86::COND_G || CC == X86::COND_GE || - CC == X86::COND_A || CC == X86::COND_AE)) || - (DAG.isEqualTo(Op1.getOperand(0), Cmp.getOperand(1)) && - DAG.isEqualTo(Op1.getOperand(1), Cmp.getOperand(0)) && - (CC == X86::COND_L || CC == X86::COND_LE || - CC == X86::COND_B || CC == X86::COND_BE))) { - - if (Op1.getOpcode() == ISD::SUB) { - SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i32); - SDValue New = DAG.getNode(X86ISD::SUB, DL, VTs, - Op1.getOperand(0), Op1.getOperand(1)); - DAG.ReplaceAllUsesWith(Op1, New); - Op1 = New; - } - - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); - unsigned NewCC = (CC == X86::COND_G || CC == X86::COND_GE || - CC == X86::COND_L || - CC == X86::COND_LE) ? X86::COND_GE : X86::COND_AE; - SDValue Ops[] = { Op2, Op1, DAG.getConstant(NewCC, MVT::i8), - SDValue(Op1.getNode(), 1) }; - return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops)); - } - } - // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y @@ -8788,11 +8871,11 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // (select (x != 0), -1, 0) -> neg & sbb // (select (x == 0), 0, -1) -> neg & sbb if (ConstantSDNode *YC = dyn_cast<ConstantSDNode>(Y)) - if (YC->isNullValue() && + if (YC->isNullValue() && (isAllOnes(Op1) == (CondCode == X86::COND_NE))) { SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32); - SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, - DAG.getConstant(0, CmpOp0.getValueType()), + SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, + DAG.getConstant(0, CmpOp0.getValueType()), CmpOp0); SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), DAG.getConstant(X86::COND_B, MVT::i8), @@ -8883,9 +8966,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } if (addTest) { - // Look pass the truncate. - if (Cond.getOpcode() == ISD::TRUNCATE) - Cond = Cond.getOperand(0); + // Look pass the truncate if the high bits are known zero. + if (isTruncWithZeroHighBitsInput(Cond, DAG)) + Cond = Cond.getOperand(0); // We know the result of AND is compared against zero. Try to match // it to BT. @@ -8908,7 +8991,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // a < b ? 0 : -1 -> RES = setcc_carry // a >= b ? -1 : 0 -> RES = setcc_carry // a >= b ? 0 : -1 -> RES = ~setcc_carry - if (Cond.getOpcode() == X86ISD::CMP) { + if (Cond.getOpcode() == X86ISD::SUB) { Cond = ConvertCmpIfNecessary(Cond, DAG); unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue(); @@ -9192,9 +9275,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { } if (addTest) { - // Look pass the truncate. - if (Cond.getOpcode() == ISD::TRUNCATE) - Cond = Cond.getOperand(0); + // Look pass the truncate if the high bits are known zero. + if (isTruncWithZeroHighBitsInput(Cond, DAG)) + Cond = Cond.getOperand(0); // We know the result of AND is compared against zero. Try to match // it to BT. @@ -9459,8 +9542,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT, SDValue ShOps[4]; ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, MVT::i32); - ShOps[2] = DAG.getUNDEF(MVT::i32); - ShOps[3] = DAG.getUNDEF(MVT::i32); + ShOps[2] = ShOps[3] = DAG.getUNDEF(MVT::i32); ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4); // The return type has to be a 128-bit type with the same element @@ -9503,8 +9585,8 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_sse2_ucomigt_sd: case Intrinsic::x86_sse2_ucomige_sd: case Intrinsic::x86_sse2_ucomineq_sd: { - unsigned Opc = 0; - ISD::CondCode CC = ISD::SETCC_INVALID; + unsigned Opc; + ISD::CondCode CC; switch (IntNo) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::x86_sse_comieq_ss: @@ -9578,55 +9660,102 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const DAG.getConstant(X86CC, MVT::i8), Cond); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } + // Arithmetic intrinsics. case Intrinsic::x86_sse2_pmulu_dq: case Intrinsic::x86_avx2_pmulu_dq: return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + + // SSE3/AVX horizontal add/sub intrinsics case Intrinsic::x86_sse3_hadd_ps: case Intrinsic::x86_sse3_hadd_pd: case Intrinsic::x86_avx_hadd_ps_256: case Intrinsic::x86_avx_hadd_pd_256: - return DAG.getNode(X86ISD::FHADD, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_sse3_hsub_ps: case Intrinsic::x86_sse3_hsub_pd: case Intrinsic::x86_avx_hsub_ps_256: case Intrinsic::x86_avx_hsub_pd_256: - return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_ssse3_phadd_w_128: case Intrinsic::x86_ssse3_phadd_d_128: case Intrinsic::x86_avx2_phadd_w: case Intrinsic::x86_avx2_phadd_d: - return DAG.getNode(X86ISD::HADD, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_ssse3_phsub_w_128: case Intrinsic::x86_ssse3_phsub_d_128: case Intrinsic::x86_avx2_phsub_w: - case Intrinsic::x86_avx2_phsub_d: - return DAG.getNode(X86ISD::HSUB, dl, Op.getValueType(), + case Intrinsic::x86_avx2_phsub_d: { + unsigned Opcode; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_sse3_hadd_ps: + case Intrinsic::x86_sse3_hadd_pd: + case Intrinsic::x86_avx_hadd_ps_256: + case Intrinsic::x86_avx_hadd_pd_256: + Opcode = X86ISD::FHADD; + break; + case Intrinsic::x86_sse3_hsub_ps: + case Intrinsic::x86_sse3_hsub_pd: + case Intrinsic::x86_avx_hsub_ps_256: + case Intrinsic::x86_avx_hsub_pd_256: + Opcode = X86ISD::FHSUB; + break; + case Intrinsic::x86_ssse3_phadd_w_128: + case Intrinsic::x86_ssse3_phadd_d_128: + case Intrinsic::x86_avx2_phadd_w: + case Intrinsic::x86_avx2_phadd_d: + Opcode = X86ISD::HADD; + break; + case Intrinsic::x86_ssse3_phsub_w_128: + case Intrinsic::x86_ssse3_phsub_d_128: + case Intrinsic::x86_avx2_phsub_w: + case Intrinsic::x86_avx2_phsub_d: + Opcode = X86ISD::HSUB; + break; + } + return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + } + + // AVX2 variable shift intrinsics case Intrinsic::x86_avx2_psllv_d: case Intrinsic::x86_avx2_psllv_q: case Intrinsic::x86_avx2_psllv_d_256: case Intrinsic::x86_avx2_psllv_q_256: - return DAG.getNode(ISD::SHL, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_avx2_psrlv_d: case Intrinsic::x86_avx2_psrlv_q: case Intrinsic::x86_avx2_psrlv_d_256: case Intrinsic::x86_avx2_psrlv_q_256: - return DAG.getNode(ISD::SRL, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_avx2_psrav_d: - case Intrinsic::x86_avx2_psrav_d_256: - return DAG.getNode(ISD::SRA, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_avx2_psrav_d_256: { + unsigned Opcode; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_avx2_psllv_d: + case Intrinsic::x86_avx2_psllv_q: + case Intrinsic::x86_avx2_psllv_d_256: + case Intrinsic::x86_avx2_psllv_q_256: + Opcode = ISD::SHL; + break; + case Intrinsic::x86_avx2_psrlv_d: + case Intrinsic::x86_avx2_psrlv_q: + case Intrinsic::x86_avx2_psrlv_d_256: + case Intrinsic::x86_avx2_psrlv_q_256: + Opcode = ISD::SRL; + break; + case Intrinsic::x86_avx2_psrav_d: + case Intrinsic::x86_avx2_psrav_d_256: + Opcode = ISD::SRA; + break; + } + return DAG.getNode(Opcode, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + } + case Intrinsic::x86_ssse3_pshuf_b_128: case Intrinsic::x86_avx2_pshuf_b: return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_ssse3_psign_b_128: case Intrinsic::x86_ssse3_psign_w_128: case Intrinsic::x86_ssse3_psign_d_128: @@ -9635,15 +9764,18 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_avx2_psign_d: return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse41_insertps: return DAG.getNode(X86ISD::INSERTPS, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::x86_avx_vperm2f128_ps_256: case Intrinsic::x86_avx_vperm2f128_pd_256: case Intrinsic::x86_avx_vperm2f128_si_256: case Intrinsic::x86_avx2_vperm2i128: return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::x86_avx2_permd: case Intrinsic::x86_avx2_permps: // Operands intentionally swapped. Mask is last operand to intrinsic, @@ -9673,7 +9805,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_avx_vtestc_pd_256: case Intrinsic::x86_avx_vtestnzc_pd_256: { bool IsTestPacked = false; - unsigned X86CC = 0; + unsigned X86CC; switch (IntNo) { default: llvm_unreachable("Bad fallthrough in Intrinsic lowering."); case Intrinsic::x86_avx_vtestz_ps: @@ -9724,44 +9856,93 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_avx2_psll_w: case Intrinsic::x86_avx2_psll_d: case Intrinsic::x86_avx2_psll_q: - return DAG.getNode(X86ISD::VSHL, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_sse2_psrl_w: case Intrinsic::x86_sse2_psrl_d: case Intrinsic::x86_sse2_psrl_q: case Intrinsic::x86_avx2_psrl_w: case Intrinsic::x86_avx2_psrl_d: case Intrinsic::x86_avx2_psrl_q: - return DAG.getNode(X86ISD::VSRL, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_sse2_psra_w: case Intrinsic::x86_sse2_psra_d: case Intrinsic::x86_avx2_psra_w: - case Intrinsic::x86_avx2_psra_d: - return DAG.getNode(X86ISD::VSRA, dl, Op.getValueType(), + case Intrinsic::x86_avx2_psra_d: { + unsigned Opcode; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_sse2_psll_w: + case Intrinsic::x86_sse2_psll_d: + case Intrinsic::x86_sse2_psll_q: + case Intrinsic::x86_avx2_psll_w: + case Intrinsic::x86_avx2_psll_d: + case Intrinsic::x86_avx2_psll_q: + Opcode = X86ISD::VSHL; + break; + case Intrinsic::x86_sse2_psrl_w: + case Intrinsic::x86_sse2_psrl_d: + case Intrinsic::x86_sse2_psrl_q: + case Intrinsic::x86_avx2_psrl_w: + case Intrinsic::x86_avx2_psrl_d: + case Intrinsic::x86_avx2_psrl_q: + Opcode = X86ISD::VSRL; + break; + case Intrinsic::x86_sse2_psra_w: + case Intrinsic::x86_sse2_psra_d: + case Intrinsic::x86_avx2_psra_w: + case Intrinsic::x86_avx2_psra_d: + Opcode = X86ISD::VSRA; + break; + } + return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + } + + // SSE/AVX immediate shift intrinsics case Intrinsic::x86_sse2_pslli_w: case Intrinsic::x86_sse2_pslli_d: case Intrinsic::x86_sse2_pslli_q: case Intrinsic::x86_avx2_pslli_w: case Intrinsic::x86_avx2_pslli_d: case Intrinsic::x86_avx2_pslli_q: - return getTargetVShiftNode(X86ISD::VSHLI, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2), DAG); case Intrinsic::x86_sse2_psrli_w: case Intrinsic::x86_sse2_psrli_d: case Intrinsic::x86_sse2_psrli_q: case Intrinsic::x86_avx2_psrli_w: case Intrinsic::x86_avx2_psrli_d: case Intrinsic::x86_avx2_psrli_q: - return getTargetVShiftNode(X86ISD::VSRLI, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2), DAG); case Intrinsic::x86_sse2_psrai_w: case Intrinsic::x86_sse2_psrai_d: case Intrinsic::x86_avx2_psrai_w: - case Intrinsic::x86_avx2_psrai_d: - return getTargetVShiftNode(X86ISD::VSRAI, dl, Op.getValueType(), + case Intrinsic::x86_avx2_psrai_d: { + unsigned Opcode; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_sse2_pslli_w: + case Intrinsic::x86_sse2_pslli_d: + case Intrinsic::x86_sse2_pslli_q: + case Intrinsic::x86_avx2_pslli_w: + case Intrinsic::x86_avx2_pslli_d: + case Intrinsic::x86_avx2_pslli_q: + Opcode = X86ISD::VSHLI; + break; + case Intrinsic::x86_sse2_psrli_w: + case Intrinsic::x86_sse2_psrli_d: + case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_avx2_psrli_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + Opcode = X86ISD::VSRLI; + break; + case Intrinsic::x86_sse2_psrai_w: + case Intrinsic::x86_sse2_psrai_d: + case Intrinsic::x86_avx2_psrai_w: + case Intrinsic::x86_avx2_psrai_d: + Opcode = X86ISD::VSRAI; + break; + } + return getTargetVShiftNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), DAG); + } + // Fix vector shift instructions where the last operand is a non-immediate // i32 value. case Intrinsic::x86_mmx_pslli_w: @@ -9776,8 +9957,9 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const if (isa<ConstantSDNode>(ShAmt)) return SDValue(); - unsigned NewIntNo = 0; + unsigned NewIntNo; switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::x86_mmx_pslli_w: NewIntNo = Intrinsic::x86_mmx_psll_w; break; @@ -9802,7 +9984,6 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_mmx_psrai_d: NewIntNo = Intrinsic::x86_mmx_psra_d; break; - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. } // The vector shift intrinsics with scalars uses 32b shift amounts but @@ -9818,6 +9999,84 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const DAG.getConstant(NewIntNo, MVT::i32), Op.getOperand(1), ShAmt); } + case Intrinsic::x86_sse42_pcmpistria128: + case Intrinsic::x86_sse42_pcmpestria128: + case Intrinsic::x86_sse42_pcmpistric128: + case Intrinsic::x86_sse42_pcmpestric128: + case Intrinsic::x86_sse42_pcmpistrio128: + case Intrinsic::x86_sse42_pcmpestrio128: + case Intrinsic::x86_sse42_pcmpistris128: + case Intrinsic::x86_sse42_pcmpestris128: + case Intrinsic::x86_sse42_pcmpistriz128: + case Intrinsic::x86_sse42_pcmpestriz128: { + unsigned Opcode; + unsigned X86CC; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_sse42_pcmpistria128: + Opcode = X86ISD::PCMPISTRI; + X86CC = X86::COND_A; + break; + case Intrinsic::x86_sse42_pcmpestria128: + Opcode = X86ISD::PCMPESTRI; + X86CC = X86::COND_A; + break; + case Intrinsic::x86_sse42_pcmpistric128: + Opcode = X86ISD::PCMPISTRI; + X86CC = X86::COND_B; + break; + case Intrinsic::x86_sse42_pcmpestric128: + Opcode = X86ISD::PCMPESTRI; + X86CC = X86::COND_B; + break; + case Intrinsic::x86_sse42_pcmpistrio128: + Opcode = X86ISD::PCMPISTRI; + X86CC = X86::COND_O; + break; + case Intrinsic::x86_sse42_pcmpestrio128: + Opcode = X86ISD::PCMPESTRI; + X86CC = X86::COND_O; + break; + case Intrinsic::x86_sse42_pcmpistris128: + Opcode = X86ISD::PCMPISTRI; + X86CC = X86::COND_S; + break; + case Intrinsic::x86_sse42_pcmpestris128: + Opcode = X86ISD::PCMPESTRI; + X86CC = X86::COND_S; + break; + case Intrinsic::x86_sse42_pcmpistriz128: + Opcode = X86ISD::PCMPISTRI; + X86CC = X86::COND_E; + break; + case Intrinsic::x86_sse42_pcmpestriz128: + Opcode = X86ISD::PCMPESTRI; + X86CC = X86::COND_E; + break; + } + SmallVector<SDValue, 5> NewOps; + NewOps.append(Op->op_begin()+1, Op->op_end()); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size()); + SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), + SDValue(PCMP.getNode(), 1)); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); + } + + case Intrinsic::x86_sse42_pcmpistri128: + case Intrinsic::x86_sse42_pcmpestri128: { + unsigned Opcode; + if (IntNo == Intrinsic::x86_sse42_pcmpistri128) + Opcode = X86ISD::PCMPISTRI; + else + Opcode = X86ISD::PCMPESTRI; + + SmallVector<SDValue, 5> NewOps; + NewOps.append(Op->op_begin()+1, Op->op_end()); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size()); + } } } @@ -10231,7 +10490,7 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const { static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); - assert(VT.getSizeInBits() == 256 && VT.isInteger() && + assert(VT.is256BitVector() && VT.isInteger() && "Unsupported value type for operation"); unsigned NumElems = VT.getVectorNumElements(); @@ -10256,14 +10515,14 @@ static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const { - assert(Op.getValueType().getSizeInBits() == 256 && + assert(Op.getValueType().is256BitVector() && Op.getValueType().isInteger() && "Only handle AVX 256-bit vector integer operation"); return Lower256IntArith(Op, DAG); } SDValue X86TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const { - assert(Op.getValueType().getSizeInBits() == 256 && + assert(Op.getValueType().is256BitVector() && Op.getValueType().isInteger() && "Only handle AVX 256-bit vector integer operation"); return Lower256IntArith(Op, DAG); @@ -10273,7 +10532,7 @@ SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); // Decompose 256-bit ops into smaller 128-bit ops. - if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2()) + if (VT.is256BitVector() && !Subtarget->hasAVX2()) return Lower256IntArith(Op, DAG); assert((VT == MVT::v2i64 || VT == MVT::v4i64) && @@ -10503,7 +10762,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { } // Decompose 256-bit shifts into smaller 128-bit shifts. - if (VT.getSizeInBits() == 256) { + if (VT.is256BitVector()) { unsigned NumElems = VT.getVectorNumElements(); MVT EltVT = VT.getVectorElementType().getSimpleVT(); EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); @@ -10992,9 +11251,9 @@ static void ReplaceATOMIC_LOAD(SDNode *Node, Results.push_back(Swap.getValue(1)); } -void X86TargetLowering:: +static void ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG, unsigned NewOp) const { + SelectionDAG &DAG, unsigned NewOp) { DebugLoc dl = Node->getDebugLoc(); assert (Node->getValueType(0) == MVT::i64 && "Only know how to expand i64 atomics"); @@ -11092,7 +11351,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Regs64bit ? X86::RBX : X86::EBX, swapInL, cpInH.getValue(1)); swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl, - Regs64bit ? X86::RCX : X86::ECX, + Regs64bit ? X86::RCX : X86::ECX, swapInH, swapInL.getValue(1)); SDValue Ops[] = { swapInH.getValue(0), N->getOperand(1), @@ -11115,26 +11374,40 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::ATOMIC_LOAD_ADD: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG); - return; case ISD::ATOMIC_LOAD_AND: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG); - return; case ISD::ATOMIC_LOAD_NAND: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG); - return; case ISD::ATOMIC_LOAD_OR: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG); - return; case ISD::ATOMIC_LOAD_SUB: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG); - return; case ISD::ATOMIC_LOAD_XOR: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG); - return; - case ISD::ATOMIC_SWAP: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG); + case ISD::ATOMIC_SWAP: { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected opcode"); + case ISD::ATOMIC_LOAD_ADD: + Opc = X86ISD::ATOMADD64_DAG; + break; + case ISD::ATOMIC_LOAD_AND: + Opc = X86ISD::ATOMAND64_DAG; + break; + case ISD::ATOMIC_LOAD_NAND: + Opc = X86ISD::ATOMNAND64_DAG; + break; + case ISD::ATOMIC_LOAD_OR: + Opc = X86ISD::ATOMOR64_DAG; + break; + case ISD::ATOMIC_LOAD_SUB: + Opc = X86ISD::ATOMSUB64_DAG; + break; + case ISD::ATOMIC_LOAD_XOR: + Opc = X86ISD::ATOMXOR64_DAG; + break; + case ISD::ATOMIC_SWAP: + Opc = X86ISD::ATOMSWAP64_DAG; + break; + } + ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc); return; + } case ISD::ATOMIC_LOAD: ReplaceATOMIC_LOAD(N, Results, DAG); } @@ -11194,6 +11467,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FHSUB: return "X86ISD::FHSUB"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; + case X86ISD::FMAXC: return "X86ISD::FMAXC"; + case X86ISD::FMINC: return "X86ISD::FMINC"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; case X86ISD::FRCP: return "X86ISD::FRCP"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; @@ -11212,7 +11487,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG"; case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG"; case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; + case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL"; case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; + case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ"; case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ"; case X86ISD::VSHL: return "X86ISD::VSHL"; @@ -11273,6 +11550,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL"; case X86ISD::SAHF: return "X86ISD::SAHF"; case X86ISD::RDRAND: return "X86ISD::RDRAND"; + case X86ISD::FMADD: return "X86ISD::FMADD"; + case X86ISD::FMSUB: return "X86ISD::FMSUB"; + case X86ISD::FNMADD: return "X86ISD::FNMADD"; + case X86ISD::FNMSUB: return "X86ISD::FNMSUB"; + case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB"; + case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD"; } } @@ -11408,7 +11691,7 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask, // FIXME: This collection of masks seems suspect. if (NumElts == 2) return true; - if (NumElts == 4 && VT.getSizeInBits() == 128) { + if (NumElts == 4 && VT.is128BitVector()) { return (isMOVLMask(Mask, VT) || isCommutedMOVLMask(Mask, VT, true) || isSHUFPMask(Mask, VT, Subtarget->hasAVX()) || @@ -11834,8 +12117,7 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, MIB.addOperand(Op); } BuildMI(*BB, MI, dl, - TII->get(Subtarget->hasAVX() ? X86::VMOVAPSrr : X86::MOVAPSrr), - MI->getOperand(0).getReg()) + TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) .addReg(X86::XMM0); MI->eraseFromParent(); @@ -11868,24 +12150,6 @@ X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const { } MachineBasicBlock * -X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const { - DebugLoc dl = MI->getDebugLoc(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - // First arg in ECX, the second in EAX. - BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX) - .addReg(MI->getOperand(0).getReg()); - BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX) - .addReg(MI->getOperand(1).getReg()); - - // The instruction doesn't actually take any operands though. - BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr)); - - MI->eraseFromParent(); // The pseudo is gone now. - return BB; -} - -MachineBasicBlock * X86TargetLowering::EmitVAARG64WithCustomInserter( MachineInstr *MI, MachineBasicBlock *MBB) const { @@ -12675,185 +12939,208 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // String/text processing lowering. case X86::PCMPISTRM128REG: case X86::VPCMPISTRM128REG: - return EmitPCMP(MI, BB, 3, false /* in-mem */); case X86::PCMPISTRM128MEM: case X86::VPCMPISTRM128MEM: - return EmitPCMP(MI, BB, 3, true /* in-mem */); case X86::PCMPESTRM128REG: case X86::VPCMPESTRM128REG: - return EmitPCMP(MI, BB, 5, false /* in mem */); case X86::PCMPESTRM128MEM: - case X86::VPCMPESTRM128MEM: - return EmitPCMP(MI, BB, 5, true /* in mem */); + case X86::VPCMPESTRM128MEM: { + unsigned NumArgs; + bool MemArg; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::PCMPISTRM128REG: + case X86::VPCMPISTRM128REG: + NumArgs = 3; MemArg = false; break; + case X86::PCMPISTRM128MEM: + case X86::VPCMPISTRM128MEM: + NumArgs = 3; MemArg = true; break; + case X86::PCMPESTRM128REG: + case X86::VPCMPESTRM128REG: + NumArgs = 5; MemArg = false; break; + case X86::PCMPESTRM128MEM: + case X86::VPCMPESTRM128MEM: + NumArgs = 5; MemArg = true; break; + } + return EmitPCMP(MI, BB, NumArgs, MemArg); + } // Thread synchronization. case X86::MONITOR: return EmitMonitor(MI, BB); - case X86::MWAIT: - return EmitMwait(MI, BB); // Atomic Lowering. - case X86::ATOMAND32: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, - X86::AND32ri, X86::MOV32rm, - X86::LCMPXCHG32, - X86::NOT32r, X86::EAX, - &X86::GR32RegClass); - case X86::ATOMOR32: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr, - X86::OR32ri, X86::MOV32rm, - X86::LCMPXCHG32, - X86::NOT32r, X86::EAX, - &X86::GR32RegClass); - case X86::ATOMXOR32: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr, - X86::XOR32ri, X86::MOV32rm, - X86::LCMPXCHG32, - X86::NOT32r, X86::EAX, - &X86::GR32RegClass); - case X86::ATOMNAND32: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, - X86::AND32ri, X86::MOV32rm, - X86::LCMPXCHG32, - X86::NOT32r, X86::EAX, - &X86::GR32RegClass, true); case X86::ATOMMIN32: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr); case X86::ATOMMAX32: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr); case X86::ATOMUMIN32: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr); case X86::ATOMUMAX32: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr); + case X86::ATOMMIN16: + case X86::ATOMMAX16: + case X86::ATOMUMIN16: + case X86::ATOMUMAX16: + case X86::ATOMMIN64: + case X86::ATOMMAX64: + case X86::ATOMUMIN64: + case X86::ATOMUMAX64: { + unsigned Opc; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::ATOMMIN32: Opc = X86::CMOVL32rr; break; + case X86::ATOMMAX32: Opc = X86::CMOVG32rr; break; + case X86::ATOMUMIN32: Opc = X86::CMOVB32rr; break; + case X86::ATOMUMAX32: Opc = X86::CMOVA32rr; break; + case X86::ATOMMIN16: Opc = X86::CMOVL16rr; break; + case X86::ATOMMAX16: Opc = X86::CMOVG16rr; break; + case X86::ATOMUMIN16: Opc = X86::CMOVB16rr; break; + case X86::ATOMUMAX16: Opc = X86::CMOVA16rr; break; + case X86::ATOMMIN64: Opc = X86::CMOVL64rr; break; + case X86::ATOMMAX64: Opc = X86::CMOVG64rr; break; + case X86::ATOMUMIN64: Opc = X86::CMOVB64rr; break; + case X86::ATOMUMAX64: Opc = X86::CMOVA64rr; break; + // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way. + } + return EmitAtomicMinMaxWithCustomInserter(MI, BB, Opc); + } + + case X86::ATOMAND32: + case X86::ATOMOR32: + case X86::ATOMXOR32: + case X86::ATOMNAND32: { + bool Invert = false; + unsigned RegOpc, ImmOpc; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::ATOMAND32: + RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; break; + case X86::ATOMOR32: + RegOpc = X86::OR32rr; ImmOpc = X86::OR32ri; break; + case X86::ATOMXOR32: + RegOpc = X86::XOR32rr; ImmOpc = X86::XOR32ri; break; + case X86::ATOMNAND32: + RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; Invert = true; break; + } + return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc, + X86::MOV32rm, X86::LCMPXCHG32, + X86::NOT32r, X86::EAX, + &X86::GR32RegClass, Invert); + } case X86::ATOMAND16: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, - X86::AND16ri, X86::MOV16rm, - X86::LCMPXCHG16, - X86::NOT16r, X86::AX, - &X86::GR16RegClass); case X86::ATOMOR16: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr, - X86::OR16ri, X86::MOV16rm, - X86::LCMPXCHG16, - X86::NOT16r, X86::AX, - &X86::GR16RegClass); case X86::ATOMXOR16: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr, - X86::XOR16ri, X86::MOV16rm, - X86::LCMPXCHG16, - X86::NOT16r, X86::AX, - &X86::GR16RegClass); - case X86::ATOMNAND16: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, - X86::AND16ri, X86::MOV16rm, - X86::LCMPXCHG16, + case X86::ATOMNAND16: { + bool Invert = false; + unsigned RegOpc, ImmOpc; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::ATOMAND16: + RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; break; + case X86::ATOMOR16: + RegOpc = X86::OR16rr; ImmOpc = X86::OR16ri; break; + case X86::ATOMXOR16: + RegOpc = X86::XOR16rr; ImmOpc = X86::XOR16ri; break; + case X86::ATOMNAND16: + RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; Invert = true; break; + } + return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc, + X86::MOV16rm, X86::LCMPXCHG16, X86::NOT16r, X86::AX, - &X86::GR16RegClass, true); - case X86::ATOMMIN16: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr); - case X86::ATOMMAX16: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr); - case X86::ATOMUMIN16: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr); - case X86::ATOMUMAX16: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr); + &X86::GR16RegClass, Invert); + } case X86::ATOMAND8: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, - X86::AND8ri, X86::MOV8rm, - X86::LCMPXCHG8, - X86::NOT8r, X86::AL, - &X86::GR8RegClass); case X86::ATOMOR8: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr, - X86::OR8ri, X86::MOV8rm, - X86::LCMPXCHG8, - X86::NOT8r, X86::AL, - &X86::GR8RegClass); case X86::ATOMXOR8: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr, - X86::XOR8ri, X86::MOV8rm, - X86::LCMPXCHG8, - X86::NOT8r, X86::AL, - &X86::GR8RegClass); - case X86::ATOMNAND8: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, - X86::AND8ri, X86::MOV8rm, - X86::LCMPXCHG8, + case X86::ATOMNAND8: { + bool Invert = false; + unsigned RegOpc, ImmOpc; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::ATOMAND8: + RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; break; + case X86::ATOMOR8: + RegOpc = X86::OR8rr; ImmOpc = X86::OR8ri; break; + case X86::ATOMXOR8: + RegOpc = X86::XOR8rr; ImmOpc = X86::XOR8ri; break; + case X86::ATOMNAND8: + RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; Invert = true; break; + } + return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc, + X86::MOV8rm, X86::LCMPXCHG8, X86::NOT8r, X86::AL, - &X86::GR8RegClass, true); - // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way. + &X86::GR8RegClass, Invert); + } + // This group is for 64-bit host. case X86::ATOMAND64: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, - X86::AND64ri32, X86::MOV64rm, - X86::LCMPXCHG64, - X86::NOT64r, X86::RAX, - &X86::GR64RegClass); case X86::ATOMOR64: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr, - X86::OR64ri32, X86::MOV64rm, - X86::LCMPXCHG64, - X86::NOT64r, X86::RAX, - &X86::GR64RegClass); case X86::ATOMXOR64: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr, - X86::XOR64ri32, X86::MOV64rm, - X86::LCMPXCHG64, - X86::NOT64r, X86::RAX, - &X86::GR64RegClass); - case X86::ATOMNAND64: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, - X86::AND64ri32, X86::MOV64rm, - X86::LCMPXCHG64, + case X86::ATOMNAND64: { + bool Invert = false; + unsigned RegOpc, ImmOpc; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::ATOMAND64: + RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; break; + case X86::ATOMOR64: + RegOpc = X86::OR64rr; ImmOpc = X86::OR64ri32; break; + case X86::ATOMXOR64: + RegOpc = X86::XOR64rr; ImmOpc = X86::XOR64ri32; break; + case X86::ATOMNAND64: + RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; Invert = true; break; + } + return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc, + X86::MOV64rm, X86::LCMPXCHG64, X86::NOT64r, X86::RAX, - &X86::GR64RegClass, true); - case X86::ATOMMIN64: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr); - case X86::ATOMMAX64: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr); - case X86::ATOMUMIN64: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr); - case X86::ATOMUMAX64: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr); + &X86::GR64RegClass, Invert); + } // This group does 64-bit operations on a 32-bit host. case X86::ATOMAND6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::AND32rr, X86::AND32rr, - X86::AND32ri, X86::AND32ri, - false); case X86::ATOMOR6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::OR32rr, X86::OR32rr, - X86::OR32ri, X86::OR32ri, - false); case X86::ATOMXOR6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::XOR32rr, X86::XOR32rr, - X86::XOR32ri, X86::XOR32ri, - false); case X86::ATOMNAND6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::AND32rr, X86::AND32rr, - X86::AND32ri, X86::AND32ri, - true); case X86::ATOMADD6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::ADD32rr, X86::ADC32rr, - X86::ADD32ri, X86::ADC32ri, - false); case X86::ATOMSUB6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::SUB32rr, X86::SBB32rr, - X86::SUB32ri, X86::SBB32ri, - false); - case X86::ATOMSWAP6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::MOV32rr, X86::MOV32rr, - X86::MOV32ri, X86::MOV32ri, - false); + case X86::ATOMSWAP6432: { + bool Invert = false; + unsigned RegOpcL, RegOpcH, ImmOpcL, ImmOpcH; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::ATOMAND6432: + RegOpcL = RegOpcH = X86::AND32rr; + ImmOpcL = ImmOpcH = X86::AND32ri; + break; + case X86::ATOMOR6432: + RegOpcL = RegOpcH = X86::OR32rr; + ImmOpcL = ImmOpcH = X86::OR32ri; + break; + case X86::ATOMXOR6432: + RegOpcL = RegOpcH = X86::XOR32rr; + ImmOpcL = ImmOpcH = X86::XOR32ri; + break; + case X86::ATOMNAND6432: + RegOpcL = RegOpcH = X86::AND32rr; + ImmOpcL = ImmOpcH = X86::AND32ri; + Invert = true; + break; + case X86::ATOMADD6432: + RegOpcL = X86::ADD32rr; RegOpcH = X86::ADC32rr; + ImmOpcL = X86::ADD32ri; ImmOpcH = X86::ADC32ri; + break; + case X86::ATOMSUB6432: + RegOpcL = X86::SUB32rr; RegOpcH = X86::SBB32rr; + ImmOpcL = X86::SUB32ri; ImmOpcH = X86::SBB32ri; + break; + case X86::ATOMSWAP6432: + RegOpcL = RegOpcH = X86::MOV32rr; + ImmOpcL = ImmOpcH = X86::MOV32ri; + break; + } + return EmitAtomicBit6432WithCustomInserter(MI, BB, RegOpcL, RegOpcH, + ImmOpcL, ImmOpcH, Invert); + } + case X86::VASTART_SAVE_XMM_REGS: return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB); @@ -13043,7 +13330,7 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, false/*WriteMem*/); return DAG.getNode(ISD::BITCAST, dl, VT, ResNode); } - } + } // Emit a zeroed vector and insert the desired subvector on its // first half. @@ -13086,12 +13373,12 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); // Combine 256-bit vector shuffles. This is only profitable when in AVX mode - if (Subtarget->hasAVX() && VT.getSizeInBits() == 256 && + if (Subtarget->hasAVX() && VT.is256BitVector() && N->getOpcode() == ISD::VECTOR_SHUFFLE) return PerformShuffleCombine256(N, DAG, DCI, Subtarget); // Only handle 128 wide vector from here on. - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return SDValue(); // Combine a vector_shuffle that is equal to build_vector load1, load2, load3, @@ -13109,7 +13396,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, /// a sequence of vector shuffle operations. /// It is possible when we truncate 256-bit vector to 128-bit vector -SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, +SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, DAGCombinerInfo &DCI) const { if (!DCI.isBeforeLegalizeOps()) return SDValue(); @@ -13151,8 +13438,9 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, // PSHUFD static const int ShufMask1[] = {0, 2, 0, 0}; - OpLo = DAG.getVectorShuffle(VT, dl, OpLo, DAG.getUNDEF(VT), ShufMask1); - OpHi = DAG.getVectorShuffle(VT, dl, OpHi, DAG.getUNDEF(VT), ShufMask1); + SDValue Undef = DAG.getUNDEF(VT); + OpLo = DAG.getVectorShuffle(VT, dl, OpLo, Undef, ShufMask1); + OpHi = DAG.getVectorShuffle(VT, dl, OpHi, Undef, ShufMask1); // MOVLHPS static const int ShufMask2[] = {0, 1, 4, 5}; @@ -13210,10 +13498,9 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}; - OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, DAG.getUNDEF(MVT::v16i8), - ShufMask1); - OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, DAG.getUNDEF(MVT::v16i8), - ShufMask1); + SDValue Undef = DAG.getUNDEF(MVT::v16i8); + OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, Undef, ShufMask1); + OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, Undef, ShufMask1); OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo); OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi); @@ -13718,6 +14005,88 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Check whether a boolean test is testing a boolean value generated by +// X86ISD::SETCC. If so, return the operand of that SETCC and proper condition +// code. +// +// Simplify the following patterns: +// (Op (CMP (SETCC Cond EFLAGS) 1) EQ) or +// (Op (CMP (SETCC Cond EFLAGS) 0) NEQ) +// to (Op EFLAGS Cond) +// +// (Op (CMP (SETCC Cond EFLAGS) 0) EQ) or +// (Op (CMP (SETCC Cond EFLAGS) 1) NEQ) +// to (Op EFLAGS !Cond) +// +// where Op could be BRCOND or CMOV. +// +static SDValue BoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { + // Quit if not CMP and SUB with its value result used. + if (Cmp.getOpcode() != X86ISD::CMP && + (Cmp.getOpcode() != X86ISD::SUB || Cmp.getNode()->hasAnyUseOfValue(0))) + return SDValue(); + + // Quit if not used as a boolean value. + if (CC != X86::COND_E && CC != X86::COND_NE) + return SDValue(); + + // Check CMP operands. One of them should be 0 or 1 and the other should be + // an SetCC or extended from it. + SDValue Op1 = Cmp.getOperand(0); + SDValue Op2 = Cmp.getOperand(1); + + SDValue SetCC; + const ConstantSDNode* C = 0; + bool needOppositeCond = (CC == X86::COND_E); + + if ((C = dyn_cast<ConstantSDNode>(Op1))) + SetCC = Op2; + else if ((C = dyn_cast<ConstantSDNode>(Op2))) + SetCC = Op1; + else // Quit if all operands are not constants. + return SDValue(); + + if (C->getZExtValue() == 1) + needOppositeCond = !needOppositeCond; + else if (C->getZExtValue() != 0) + // Quit if the constant is neither 0 or 1. + return SDValue(); + + // Skip 'zext' node. + if (SetCC.getOpcode() == ISD::ZERO_EXTEND) + SetCC = SetCC.getOperand(0); + + // Quit if not SETCC. + // FIXME: So far we only handle the boolean value generated from SETCC. If + // there is other ways to generate boolean values, we need handle them here + // as well. + if (SetCC.getOpcode() != X86ISD::SETCC) + return SDValue(); + + // Set the condition code or opposite one if necessary. + CC = X86::CondCode(SetCC.getConstantOperandVal(0)); + if (needOppositeCond) + CC = X86::GetOppositeBranchCondition(CC); + + return SetCC.getOperand(1); +} + +static bool IsValidFCMOVCondition(X86::CondCode CC) { + switch (CC) { + default: + return false; + case X86::COND_B: + case X86::COND_BE: + case X86::COND_E: + case X86::COND_P: + case X86::COND_AE: + case X86::COND_A: + case X86::COND_NE: + case X86::COND_NP: + return true; + } +} + /// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL] static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { @@ -13731,6 +14100,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, SDValue TrueOp = N->getOperand(1); X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2); SDValue Cond = N->getOperand(3); + if (CC == X86::COND_E || CC == X86::COND_NE) { switch (Cond.getOpcode()) { default: break; @@ -13742,6 +14112,18 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, } } + SDValue Flags; + + Flags = BoolTestSetCCCombine(Cond, CC); + if (Flags.getNode() && + // Extra check as FCMOV only supports a subset of X86 cond. + (FalseOp.getValueType() != MVT::f80 || IsValidFCMOVCondition(CC))) { + SDValue Ops[] = { FalseOp, TrueOp, + DAG.getConstant(CC, MVT::i8), Flags }; + return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), + Ops, array_lengthof(Ops)); + } + // If this is a select between two integer constants, try to do some // optimizations. Note that the operands are ordered the opposite of SELECT // operands. @@ -14164,7 +14546,7 @@ static bool CanFoldXORWithAllOnes(const SDNode *N) { // Sometimes the operand may come from a insert_subvector building a 256-bit // allones vector - if (VT.getSizeInBits() == 256 && + if (VT.is256BitVector() && N->getOpcode() == ISD::INSERT_SUBVECTOR) { SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); @@ -14609,7 +14991,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, // On Sandy Bridge, 256-bit memory operations are executed by two // 128-bit ports. However, on Haswell it is better to issue a single 256-bit // memory operation. - if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2() && + if (VT.is256BitVector() && !Subtarget->hasAVX2() && StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS && StoredVal.getNumOperands() == 2) { SDValue Value0 = StoredVal.getOperand(0); @@ -14992,6 +15374,29 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +/// PerformFMinFMaxCombine - Do target-specific dag combines on X86ISD::FMIN and +/// X86ISD::FMAX nodes. +static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) { + assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX); + + // Only perform optimizations if UnsafeMath is used. + if (!DAG.getTarget().Options.UnsafeFPMath) + return SDValue(); + + // If we run in unsafe-math mode, then convert the FMAX and FMIN nodes + // into FMINC and MMAXC, which are Commutative operations. + unsigned NewOp = 0; + switch (N->getOpcode()) { + default: llvm_unreachable("unknown opcode"); + case X86ISD::FMIN: NewOp = X86ISD::FMINC; break; + case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break; + } + + return DAG.getNode(NewOp, N->getDebugLoc(), N->getValueType(0), + N->getOperand(0), N->getOperand(1)); +} + + /// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes. static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { // FAND(0.0, x) -> 0.0 @@ -15067,19 +15472,19 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, // concat the vectors to original VT unsigned NumElems = OpVT.getVectorNumElements(); + SDValue Undef = DAG.getUNDEF(OpVT); + SmallVector<int,8> ShufMask1(NumElems, -1); for (unsigned i = 0; i != NumElems/2; ++i) ShufMask1[i] = i; - SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT), - &ShufMask1[0]); + SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask1[0]); SmallVector<int,8> ShufMask2(NumElems, -1); for (unsigned i = 0; i != NumElems/2; ++i) ShufMask2[i] = i + NumElems/2; - SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT), - &ShufMask2[0]); + SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask2[0]); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), VT.getVectorNumElements()/2); @@ -15092,6 +15497,40 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget* Subtarget) { + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + EVT ScalarVT = VT.getScalarType(); + if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasFMA()) + return SDValue(); + + SDValue A = N->getOperand(0); + SDValue B = N->getOperand(1); + SDValue C = N->getOperand(2); + + bool NegA = (A.getOpcode() == ISD::FNEG); + bool NegB = (B.getOpcode() == ISD::FNEG); + bool NegC = (C.getOpcode() == ISD::FNEG); + + // Negative multiplication when NegA xor NegB + bool NegMul = (NegA != NegB); + if (NegA) + A = A.getOperand(0); + if (NegB) + B = B.getOperand(0); + if (NegC) + C = C.getOperand(0); + + unsigned Opcode; + if (!NegMul) + Opcode = (!NegC)? X86ISD::FMADD : X86ISD::FMSUB; + else + Opcode = (!NegC)? X86ISD::FNMADD : X86ISD::FNMSUB; + return DAG.getNode(Opcode, dl, VT, A, B, C); +} + static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { @@ -15164,7 +15603,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) { ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); + SDValue RHS = N->getOperand(1); if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB) if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0))) @@ -15187,19 +15626,50 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) { // Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) { - unsigned X86CC = N->getConstantOperandVal(0); - SDValue EFLAG = N->getOperand(1); DebugLoc DL = N->getDebugLoc(); + X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0)); + SDValue EFLAGS = N->getOperand(1); // Materialize "setb reg" as "sbb reg,reg", since it can be extended without // a zext and produces an all-ones bit which is more useful than 0/1 in some // cases. - if (X86CC == X86::COND_B) + if (CC == X86::COND_B) return DAG.getNode(ISD::AND, DL, MVT::i8, DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, - DAG.getConstant(X86CC, MVT::i8), EFLAG), + DAG.getConstant(CC, MVT::i8), EFLAGS), DAG.getConstant(1, MVT::i8)); + SDValue Flags; + + Flags = BoolTestSetCCCombine(EFLAGS, CC); + if (Flags.getNode()) { + SDValue Cond = DAG.getConstant(CC, MVT::i8); + return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags); + } + + return SDValue(); +} + +// Optimize branch condition evaluation. +// +static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { + DebugLoc DL = N->getDebugLoc(); + SDValue Chain = N->getOperand(0); + SDValue Dest = N->getOperand(1); + SDValue EFLAGS = N->getOperand(3); + X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2)); + + SDValue Flags; + + Flags = BoolTestSetCCCombine(EFLAGS, CC); + if (Flags.getNode()) { + SDValue Cond = DAG.getConstant(CC, MVT::i8); + return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond, + Flags); + } + return SDValue(); } @@ -15408,6 +15878,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget); case X86ISD::FXOR: case X86ISD::FOR: return PerformFORCombine(N, DAG); + case X86ISD::FMIN: + case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG); case X86ISD::FAND: return PerformFANDCombine(N, DAG); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); @@ -15417,6 +15889,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG, DCI); case ISD::SETCC: return PerformISDSETCCCombine(N, DAG); case X86ISD::SETCC: return PerformSETCCCombine(N, DAG); + case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::PALIGN: case X86ISD::UNPCKH: @@ -15431,6 +15904,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::VPERMILP: case X86ISD::VPERM2X128: case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget); + case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget); } return SDValue(); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 78e4d75..74f5167 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -137,10 +137,6 @@ namespace llvm { /// relative displacements. WrapperRIP, - /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word - /// of an XMM vector, with the high word zero filled. - MOVQ2DQ, - /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector /// to an MMX vector. If you think this is too close to the previous /// mnemonic, so do I; blame Intel. @@ -199,6 +195,9 @@ namespace llvm { /// FMAX, FMIN, + /// FMAXC, FMINC - Commutative FMIN and FMAX. + FMAXC, FMINC, + /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal /// approximation. Note that these typically require refinement /// in order to obtain suitable precision. @@ -231,6 +230,9 @@ namespace llvm { // VSEXT_MOVL - Vector move low and sign extend. VSEXT_MOVL, + // VFPEXT - Vector FP extend. + VFPEXT, + // VSHL, VSRL - 128-bit vector logical left / right shift VSHLDQ, VSRLDQ, @@ -294,6 +296,14 @@ namespace llvm { // PMULUDQ - Vector multiply packed unsigned doubleword integers PMULUDQ, + // FMA nodes + FMADD, + FNMADD, + FMSUB, + FNMSUB, + FMADDSUB, + FMSUBADD, + // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, // according to %al. An operator is needed so that this can be expanded // with control flow. @@ -325,6 +335,10 @@ namespace llvm { // RDRAND - Get a random integer and indicate whether it is valid in CF. RDRAND, + // PCMP*STRI + PCMPISTRI, + PCMPESTRI, + // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - // Atomic 64-bit binary operations. @@ -597,6 +611,12 @@ namespace llvm { virtual bool isZExtFree(Type *Ty1, Type *Ty2) const; virtual bool isZExtFree(EVT VT1, EVT VT2) const; + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than + /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to + /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd + /// is expanded to mul + add. + virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; } + /// isNarrowingProfitable - Return true if it's profitable to narrow /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow /// from i32 to i8 but not from i32 to i16. @@ -656,7 +676,8 @@ namespace llvm { /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. - virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const; + virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) const; /// getStackCookieLocation - Return true if the target stores stack /// protector cookies at a fixed offset in some non-standard address @@ -813,6 +834,8 @@ namespace llvm { SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const; SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const; + virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -844,9 +867,6 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const; - void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG, unsigned NewOp) const; - /// Utility function to emit string processing sse4.2 instructions /// that return in xmm0. /// This takes the instruction to expand, the associated machine basic @@ -933,7 +953,8 @@ namespace llvm { }; namespace X86 { - FastISel *createFastISel(FunctionLoweringInfo &funcInfo); + FastISel *createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo); } } diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index b6ba68f..f790611 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -1132,8 +1132,10 @@ defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m, X86xor_flag, xor, 1, 0>; defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m, X86add_flag, add, 1, 1>; +let isCompare = 1 in { defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m, X86sub_flag, sub, 0, 0>; +} // Arithmetic. let Uses = [EFLAGS] in { diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td index 0d5490a..2eb454d 100644 --- a/lib/Target/X86/X86InstrExtension.td +++ b/lib/Target/X86/X86InstrExtension.td @@ -39,12 +39,15 @@ let neverHasSideEffects = 1 in { // Sign/Zero extenders +let neverHasSideEffects = 1 in { def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>, TB, OpSize; +let mayLoad = 1 in def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>, TB, OpSize; +} // neverHasSideEffects = 1 def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB; @@ -59,12 +62,15 @@ def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), [(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>, TB; +let neverHasSideEffects = 1 in { def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>, TB, OpSize; +let mayLoad = 1 in def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>, TB, OpSize; +} // neverHasSideEffects = 1 def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB; @@ -82,6 +88,7 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), // These are the same as the regular MOVZX32rr8 and MOVZX32rm8 // except that they use GR32_NOREX for the output operand register class // instead of GR32. This allows them to operate on h registers on x86-64. +let neverHasSideEffects = 1, isCodeGenOnly = 1 in { def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", @@ -91,6 +98,7 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, TB; +} // MOVSX64rr8 always has a REX prefix and it has an 8-bit register // operand, which makes it a rare instruction with an 8-bit register diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index 8802a2e..95ee7e5 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -16,159 +16,307 @@ //===----------------------------------------------------------------------===// let Constraints = "$src1 = $dst" in { -multiclass fma3p_rm<bits<8> opc, string OpcodeStr> { -let neverHasSideEffects = 1 in { - def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>; - let mayLoad = 1 in - def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, f128mem:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>; - def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, VR256:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>; - let mayLoad = 1 in - def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, f256mem:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>; -} // neverHasSideEffects = 1 -} +multiclass fma3p_rm<bits<8> opc, string OpcodeStr, + PatFrag MemFrag128, PatFrag MemFrag256, + ValueType OpVT128, ValueType OpVT256, + SDPatternOperator Op = null_frag, bit MayLoad = 1> { + def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR128:$dst, (OpVT128 (Op VR128:$src2, + VR128:$src1, VR128:$src3)))]>; -// Intrinsic for 132 pattern -multiclass fma3p_rm_int<bits<8> opc, string OpcodeStr, - PatFrag MemFrag128, PatFrag MemFrag256, - Intrinsic Int128, Intrinsic Int256> { - def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src3, VR128:$src2))]>; - def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, f128mem:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (Int128 VR128:$src1, (MemFrag128 addr:$src3), VR128:$src2))]>; - def rY_Int : FMA3<opc, MRMSrcReg, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, VR256:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src3, VR256:$src2))]>; - def mY_Int : FMA3<opc, MRMSrcMem, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, f256mem:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR256:$dst, - (Int256 VR256:$src1, (MemFrag256 addr:$src3), VR256:$src2))]>; + let mayLoad = MayLoad in + def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, f128mem:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1, + (MemFrag128 addr:$src3))))]>; + + def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, VR256:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1, + VR256:$src3)))]>; + + let mayLoad = MayLoad in + def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, f256mem:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR256:$dst, + (OpVT256 (Op VR256:$src2, VR256:$src1, + (MemFrag256 addr:$src3))))]>; } } // Constraints = "$src1 = $dst" multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, string OpcodeStr, string PackTy, PatFrag MemFrag128, PatFrag MemFrag256, - Intrinsic Int128, Intrinsic Int256> { - defm r132 : fma3p_rm_int <opc132, !strconcat(OpcodeStr, - !strconcat("132", PackTy)), MemFrag128, MemFrag256, - Int128, Int256>; - defm r132 : fma3p_rm <opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>; - defm r213 : fma3p_rm <opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>; - defm r231 : fma3p_rm <opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>; + SDNode Op, ValueType OpTy128, ValueType OpTy256> { + defm r213 : fma3p_rm<opc213, + !strconcat(OpcodeStr, !strconcat("213", PackTy)), + MemFrag128, MemFrag256, OpTy128, OpTy256, Op, 0>; +let neverHasSideEffects = 1 in { + defm r132 : fma3p_rm<opc132, + !strconcat(OpcodeStr, !strconcat("132", PackTy)), + MemFrag128, MemFrag256, OpTy128, OpTy256>; + defm r231 : fma3p_rm<opc231, + !strconcat(OpcodeStr, !strconcat("231", PackTy)), + MemFrag128, MemFrag256, OpTy128, OpTy256>; +} // neverHasSideEffects = 1 } // Fused Multiply-Add let ExeDomain = SSEPackedSingle in { defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", memopv4f32, - memopv8f32, int_x86_fma_vfmadd_ps, int_x86_fma_vfmadd_ps_256>; - defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", memopv4f32, - memopv8f32, int_x86_fma_vfmsub_ps, int_x86_fma_vfmsub_ps_256>; + memopv8f32, X86Fmadd, v4f32, v8f32>; + defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", memopv4f32, + memopv8f32, X86Fmsub, v4f32, v8f32>; defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps", - memopv4f32, memopv8f32, int_x86_fma_vfmaddsub_ps, - int_x86_fma_vfmaddsub_ps_256>; + memopv4f32, memopv8f32, X86Fmaddsub, + v4f32, v8f32>; defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps", - memopv4f32, memopv8f32, int_x86_fma_vfmsubadd_ps, - int_x86_fma_vfmaddsub_ps_256>; + memopv4f32, memopv8f32, X86Fmsubadd, + v4f32, v8f32>; } let ExeDomain = SSEPackedDouble in { defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", memopv2f64, - memopv4f64, int_x86_fma_vfmadd_pd, int_x86_fma_vfmadd_pd_256>, VEX_W; + memopv4f64, X86Fmadd, v2f64, v4f64>, VEX_W; defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", memopv2f64, - memopv4f64, int_x86_fma_vfmsub_pd, int_x86_fma_vfmsub_pd_256>, VEX_W; - defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", memopv2f64, - memopv4f64, int_x86_fma_vfmaddsub_pd, int_x86_fma_vfmaddsub_pd_256>, VEX_W; - defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", memopv2f64, - memopv4f64, int_x86_fma_vfmsubadd_pd, int_x86_fma_vfmsubadd_pd_256>, VEX_W; + memopv4f64, X86Fmsub, v2f64, v4f64>, VEX_W; + defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", + memopv2f64, memopv4f64, X86Fmaddsub, + v2f64, v4f64>, VEX_W; + defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", + memopv2f64, memopv4f64, X86Fmsubadd, + v2f64, v4f64>, VEX_W; } // Fused Negative Multiply-Add let ExeDomain = SSEPackedSingle in { defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", memopv4f32, - memopv8f32, int_x86_fma_vfnmadd_ps, int_x86_fma_vfnmadd_ps_256>; + memopv8f32, X86Fnmadd, v4f32, v8f32>; defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", memopv4f32, - memopv8f32, int_x86_fma_vfnmsub_ps, int_x86_fma_vfnmsub_ps_256>; + memopv8f32, X86Fnmsub, v4f32, v8f32>; } let ExeDomain = SSEPackedDouble in { defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", memopv2f64, - memopv4f64, int_x86_fma_vfnmadd_pd, int_x86_fma_vfnmadd_pd_256>, VEX_W; - defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", memopv2f64, - memopv4f64, int_x86_fma_vfnmsub_pd, int_x86_fma_vfnmsub_pd_256>, VEX_W; + memopv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W; + defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", + memopv2f64, memopv4f64, X86Fnmsub, v2f64, + v4f64>, VEX_W; } +let Predicates = [HasFMA] in { + def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1, VR128:$src3), + (VFMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1, + (memopv4f32 addr:$src3)), + (VFMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1, VR128:$src3), + (VFMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1, + (memopv4f32 addr:$src3)), + (VFMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1, VR128:$src3), + (VFMADDSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1, + (memopv4f32 addr:$src3)), + (VFMADDSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1, VR128:$src3), + (VFMSUBADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1, + (memopv4f32 addr:$src3)), + (VFMSUBADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; + + def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1, + (memopv8f32 addr:$src3)), + (VFMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1, + (memopv8f32 addr:$src3)), + (VFMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFMADDSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1, + (memopv8f32 addr:$src3)), + (VFMADDSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFMSUBADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1, + (memopv8f32 addr:$src3)), + (VFMSUBADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + + def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1, VR128:$src3), + (VFMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1, + (memopv2f64 addr:$src3)), + (VFMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1, VR128:$src3), + (VFMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1, + (memopv2f64 addr:$src3)), + (VFMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1, VR128:$src3), + (VFMADDSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1, + (memopv2f64 addr:$src3)), + (VFMADDSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1, VR128:$src3), + (VFMSUBADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1, + (memopv2f64 addr:$src3)), + (VFMSUBADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; + + def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1, + (memopv4f64 addr:$src3)), + (VFMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1, + (memopv4f64 addr:$src3)), + (VFMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFMADDSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1, + (memopv4f64 addr:$src3)), + (VFMADDSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFMSUBADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1, + (memopv4f64 addr:$src3)), + (VFMSUBADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + + def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1, VR128:$src3), + (VFNMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1, + (memopv4f32 addr:$src3)), + (VFNMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1, VR128:$src3), + (VFNMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1, + (memopv4f32 addr:$src3)), + (VFNMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; + + def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFNMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1, + (memopv8f32 addr:$src3)), + (VFNMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFNMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1, + (memopv8f32 addr:$src3)), + (VFNMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + + def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1, VR128:$src3), + (VFNMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1, + (memopv2f64 addr:$src3)), + (VFNMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1, VR128:$src3), + (VFNMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1, + (memopv2f64 addr:$src3)), + (VFNMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; + + def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFNMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1, + (memopv4f64 addr:$src3)), + (VFNMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFNMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1, + (memopv4f64 addr:$src3)), + (VFNMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + +} // Predicates = [HasFMA] let Constraints = "$src1 = $dst" in { multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop, - RegisterClass RC> { -let neverHasSideEffects = 1 in { - def r : FMA3<opc, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2, RC:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>; - let mayLoad = 1 in - def m : FMA3<opc, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, RC:$src2, x86memop:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>; -} // neverHasSideEffects = 1 + RegisterClass RC, ValueType OpVT, PatFrag mem_frag, + SDPatternOperator OpNode = null_frag, bit MayLoad = 1> { + def r : FMA3<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, RC:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set RC:$dst, + (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>; + let mayLoad = MayLoad in + def m : FMA3<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, RC:$src2, x86memop:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set RC:$dst, + (OpVT (OpNode RC:$src2, RC:$src1, + (mem_frag addr:$src3))))]>; } multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr, Operand memop, - ComplexPattern mem_cpat, Intrinsic IntId> { + ComplexPattern mem_cpat, Intrinsic IntId, + RegisterClass RC> { def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, (IntId VR128:$src1, VR128:$src3, VR128:$src2))]>; + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR128:$dst, (IntId VR128:$src2, VR128:$src1, + VR128:$src3))]>; def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, memop:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (IntId VR128:$src1, mem_cpat:$src3, VR128:$src2))]>; + (ins VR128:$src1, VR128:$src2, memop:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR128:$dst, + (IntId VR128:$src2, VR128:$src1, mem_cpat:$src3))]>; } } // Constraints = "$src1 = $dst" multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, - string OpStr, Intrinsic IntF32, Intrinsic IntF64> { - defm SSr132 : fma3s_rm<opc132, !strconcat(OpStr, "132ss"), f32mem, FR32>; - defm SSr213 : fma3s_rm<opc213, !strconcat(OpStr, "213ss"), f32mem, FR32>; - defm SSr231 : fma3s_rm<opc231, !strconcat(OpStr, "231ss"), f32mem, FR32>; - defm SDr132 : fma3s_rm<opc132, !strconcat(OpStr, "132sd"), f64mem, FR64>, VEX_W; - defm SDr213 : fma3s_rm<opc213, !strconcat(OpStr, "213sd"), f64mem, FR64>, VEX_W; - defm SDr231 : fma3s_rm<opc231, !strconcat(OpStr, "231sd"), f64mem, FR64>, VEX_W; - defm SSr132 : fma3s_rm_int <opc132, !strconcat(OpStr, "132ss"), ssmem, - sse_load_f32, IntF32>; - defm SDr132 : fma3s_rm_int <opc132, !strconcat(OpStr, "132sd"), sdmem, - sse_load_f64, IntF64>; + string OpStr, string PackTy, Intrinsic Int, + SDNode OpNode, RegisterClass RC, ValueType OpVT, + X86MemOperand x86memop, Operand memop, PatFrag mem_frag, + ComplexPattern mem_cpat> { +let neverHasSideEffects = 1 in { + defm r132 : fma3s_rm<opc132, !strconcat(OpStr, !strconcat("132", PackTy)), + x86memop, RC, OpVT, mem_frag>; + defm r231 : fma3s_rm<opc231, !strconcat(OpStr, !strconcat("231", PackTy)), + x86memop, RC, OpVT, mem_frag>; +} + +defm r213 : fma3s_rm<opc213, !strconcat(OpStr, !strconcat("213", PackTy)), + x86memop, RC, OpVT, mem_frag, OpNode, 0>, + fma3s_rm_int<opc213, !strconcat(OpStr, !strconcat("213", PackTy)), + memop, mem_cpat, Int, RC>; +} + +multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231, + string OpStr, Intrinsic IntF32, Intrinsic IntF64, + SDNode OpNode> { + defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", IntF32, OpNode, + FR32, f32, f32mem, ssmem, loadf32, sse_load_f32>; + defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", IntF64, OpNode, + FR64, f64, f64mem, sdmem, loadf64, sse_load_f64>, VEX_W; } -defm VFMADD : fma3s_forms<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss, - int_x86_fma_vfmadd_sd>, VEX_LIG; -defm VFMSUB : fma3s_forms<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss, - int_x86_fma_vfmsub_sd>, VEX_LIG; +defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss, + int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG; +defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss, + int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG; -defm VFNMADD : fma3s_forms<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss, - int_x86_fma_vfnmadd_sd>, VEX_LIG; -defm VFNMSUB : fma3s_forms<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss, - int_x86_fma_vfnmsub_sd>, VEX_LIG; +defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss, + int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG; +defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss, + int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG; //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index a115ab4..81b4f81 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -366,7 +366,7 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, // // SDI - SSE2 instructions with XD prefix. // SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix. -// SSDI - SSE2 instructions with XS prefix. +// S2SI - SSE2 instructions with XS prefix. // SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix. // PDI - SSE2 instructions with TB and OpSize prefixes. // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes. @@ -379,10 +379,10 @@ class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>; -class SSDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> +class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE2]>; -class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, +class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>; class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, @@ -397,6 +397,10 @@ class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD, Requires<[HasAVX]>; +class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS, + Requires<[HasAVX]>; class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index ec030dd..ee2d3c4 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -29,6 +29,13 @@ def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; + +// Commutative and Associative FMIN and FMAX. +def X86fminc : SDNode<"X86ISD::FMINC", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; +def X86fmaxc : SDNode<"X86ISD::FMAXC", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; + def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp, @@ -73,14 +80,20 @@ def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; def X86vzmovly : SDNode<"X86ISD::VZEXT_MOVL", - SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisOpSmallerThanOp<1, 0> ]>>; def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL", - SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>; + SDTypeProfile<1, 1, + [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>; def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +def X86vfpext : SDNode<"X86ISD::VFPEXT", + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisFP<0>, SDTCisFP<1>]>>; + def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>; def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>; def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>; @@ -125,7 +138,10 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, -SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>; + SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>; + +def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>, + SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; @@ -160,9 +176,26 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; -def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>; -def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>; -def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>; +def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>; +def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>; +def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>; +def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>; +def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>; +def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>; +def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFma>; +def X86Fmaddsub : SDNode<"X86ISD::FMSUBADD", SDTFma>; +def X86Fmsubadd : SDNode<"X86ISD::FMADDSUB", SDTFma>; + +def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>, + SDTCisVT<4, i8>]>; +def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, v16i8>, SDTCisVT<3, i32>, + SDTCisVT<4, v16i8>, SDTCisVT<5, i32>, + SDTCisVT<6, i8>]>; + +def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>; +def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>; //===----------------------------------------------------------------------===// // SSE Complex Patterns diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 69493bc..459f01a 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -414,12 +414,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 }, { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 }, { X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 }, - { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, - { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, - { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, - { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, - { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, - { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 }, { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 }, { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, @@ -680,6 +674,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::IMUL64rr, X86::IMUL64rm, 0 }, { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, + { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, + { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, + { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, + { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, + { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, + { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 }, { X86::MAXPDrr_Int, X86::MAXPDrm_Int, TB_ALIGN_16 }, { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 }, @@ -1130,8 +1130,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMADDSDr132r, X86::VFMADDSDr132m, 0 }, { X86::VFMADDSSr213r, X86::VFMADDSSr213m, 0 }, { X86::VFMADDSDr213r, X86::VFMADDSDr213m, 0 }, - { X86::VFMADDSSr132r_Int, X86::VFMADDSSr132m_Int, 0 }, - { X86::VFMADDSDr132r_Int, X86::VFMADDSDr132m_Int, 0 }, + { X86::VFMADDSSr213r_Int, X86::VFMADDSSr213m_Int, 0 }, + { X86::VFMADDSDr213r_Int, X86::VFMADDSDr213m_Int, 0 }, { X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_16 }, { X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_16 }, @@ -1145,10 +1145,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMADDPDr132rY, X86::VFMADDPDr132mY, TB_ALIGN_32 }, { X86::VFMADDPSr213rY, X86::VFMADDPSr213mY, TB_ALIGN_32 }, { X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_32 }, - { X86::VFMADDPSr132r_Int, X86::VFMADDPSr132m_Int, TB_ALIGN_16 }, - { X86::VFMADDPDr132r_Int, X86::VFMADDPDr132m_Int, TB_ALIGN_16 }, - { X86::VFMADDPSr132rY_Int, X86::VFMADDPSr132mY_Int, TB_ALIGN_32 }, - { X86::VFMADDPDr132rY_Int, X86::VFMADDPDr132mY_Int, TB_ALIGN_32 }, { X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, 0 }, { X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, 0 }, @@ -1156,8 +1152,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, 0 }, { X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, 0 }, { X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, 0 }, - { X86::VFNMADDSSr132r_Int, X86::VFNMADDSSr132m_Int, 0 }, - { X86::VFNMADDSDr132r_Int, X86::VFNMADDSDr132m_Int, 0 }, + { X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr213m_Int, 0 }, + { X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr213m_Int, 0 }, { X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_16 }, { X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_16 }, @@ -1171,10 +1167,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFNMADDPDr132rY, X86::VFNMADDPDr132mY, TB_ALIGN_32 }, { X86::VFNMADDPSr213rY, X86::VFNMADDPSr213mY, TB_ALIGN_32 }, { X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_32 }, - { X86::VFNMADDPSr132r_Int, X86::VFNMADDPSr132m_Int, TB_ALIGN_16 }, - { X86::VFNMADDPDr132r_Int, X86::VFNMADDPDr132m_Int, TB_ALIGN_16 }, - { X86::VFNMADDPSr132rY_Int, X86::VFNMADDPSr132mY_Int, TB_ALIGN_32 }, - { X86::VFNMADDPDr132rY_Int, X86::VFNMADDPDr132mY_Int, TB_ALIGN_32 }, { X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, 0 }, { X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, 0 }, @@ -1182,8 +1174,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, 0 }, { X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, 0 }, { X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, 0 }, - { X86::VFMSUBSSr132r_Int, X86::VFMSUBSSr132m_Int, 0 }, - { X86::VFMSUBSDr132r_Int, X86::VFMSUBSDr132m_Int, 0 }, + { X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr213m_Int, 0 }, + { X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr213m_Int, 0 }, { X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_16 }, { X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_16 }, @@ -1197,10 +1189,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBPDr132rY, X86::VFMSUBPDr132mY, TB_ALIGN_32 }, { X86::VFMSUBPSr213rY, X86::VFMSUBPSr213mY, TB_ALIGN_32 }, { X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_32 }, - { X86::VFMSUBPSr132r_Int, X86::VFMSUBPSr132m_Int, TB_ALIGN_16 }, - { X86::VFMSUBPDr132r_Int, X86::VFMSUBPDr132m_Int, TB_ALIGN_16 }, - { X86::VFMSUBPSr132rY_Int, X86::VFMSUBPSr132mY_Int, TB_ALIGN_32 }, - { X86::VFMSUBPDr132rY_Int, X86::VFMSUBPDr132mY_Int, TB_ALIGN_32 }, { X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, 0 }, { X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, 0 }, @@ -1208,8 +1196,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, 0 }, { X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, 0 }, { X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, 0 }, - { X86::VFNMSUBSSr132r_Int, X86::VFNMSUBSSr132m_Int, 0 }, - { X86::VFNMSUBSDr132r_Int, X86::VFNMSUBSDr132m_Int, 0 }, + { X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr213m_Int, 0 }, + { X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr213m_Int, 0 }, { X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_16 }, { X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_16 }, @@ -1223,10 +1211,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr132mY, TB_ALIGN_32 }, { X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr213mY, TB_ALIGN_32 }, { X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr213mY, TB_ALIGN_32 }, - { X86::VFNMSUBPSr132r_Int, X86::VFNMSUBPSr132m_Int, TB_ALIGN_16 }, - { X86::VFNMSUBPDr132r_Int, X86::VFNMSUBPDr132m_Int, TB_ALIGN_16 }, - { X86::VFNMSUBPSr132rY_Int, X86::VFNMSUBPSr132mY_Int, TB_ALIGN_32 }, - { X86::VFNMSUBPDr132rY_Int, X86::VFNMSUBPDr132mY_Int, TB_ALIGN_32 }, { X86::VFMADDSUBPSr231r, X86::VFMADDSUBPSr231m, TB_ALIGN_16 }, { X86::VFMADDSUBPDr231r, X86::VFMADDSUBPDr231m, TB_ALIGN_16 }, @@ -1240,10 +1224,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr132mY, TB_ALIGN_32 }, { X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr213mY, TB_ALIGN_32 }, { X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr213mY, TB_ALIGN_32 }, - { X86::VFMADDSUBPSr132r_Int, X86::VFMADDSUBPSr132m_Int, TB_ALIGN_16 }, - { X86::VFMADDSUBPDr132r_Int, X86::VFMADDSUBPDr132m_Int, TB_ALIGN_16 }, - { X86::VFMADDSUBPSr132rY_Int, X86::VFMADDSUBPSr132mY_Int, TB_ALIGN_32 }, - { X86::VFMADDSUBPDr132rY_Int, X86::VFMADDSUBPDr132mY_Int, TB_ALIGN_32 }, { X86::VFMSUBADDPSr231r, X86::VFMSUBADDPSr231m, TB_ALIGN_16 }, { X86::VFMSUBADDPDr231r, X86::VFMSUBADDPDr231m, TB_ALIGN_16 }, @@ -1257,10 +1237,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 }, { X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 }, { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 }, - { X86::VFMSUBADDPSr132r_Int, X86::VFMSUBADDPSr132m_Int, TB_ALIGN_16 }, - { X86::VFMSUBADDPDr132r_Int, X86::VFMSUBADDPDr132m_Int, TB_ALIGN_16 }, - { X86::VFMSUBADDPSr132rY_Int, X86::VFMSUBADDPSr132mY_Int, TB_ALIGN_32 }, - { X86::VFMSUBADDPDr132rY_Int, X86::VFMSUBADDPDr132mY_Int, TB_ALIGN_32 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) { @@ -1318,8 +1294,7 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); switch (MI.getOpcode()) { - default: - llvm_unreachable(0); + default: llvm_unreachable("Unreachable!"); case X86::MOVSX16rr8: case X86::MOVZX16rr8: case X86::MOVSX32rr8: @@ -1463,6 +1438,9 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, /// regIsPICBase - Return true if register is PIC base (i.e.g defined by /// X86::MOVPC32r. static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { + // Don't waste compile time scanning use-def chains of physregs. + if (!TargetRegisterInfo::isVirtualRegister(BaseReg)) + return false; bool isPICBase = false; for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), E = MRI.def_end(); I != E; ++I) { @@ -1480,78 +1458,69 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, AliasAnalysis *AA) const { switch (MI->getOpcode()) { default: break; - case X86::MOV8rm: - case X86::MOV16rm: - case X86::MOV32rm: - case X86::MOV64rm: - case X86::LD_Fp64m: - case X86::MOVSSrm: - case X86::MOVSDrm: - case X86::MOVAPSrm: - case X86::MOVUPSrm: - case X86::MOVAPDrm: - case X86::MOVDQArm: - case X86::VMOVSSrm: - case X86::VMOVSDrm: - case X86::VMOVAPSrm: - case X86::VMOVUPSrm: - case X86::VMOVAPDrm: - case X86::VMOVDQArm: - case X86::VMOVAPSYrm: - case X86::VMOVUPSYrm: - case X86::VMOVAPDYrm: - case X86::VMOVDQAYrm: - case X86::MMX_MOVD64rm: - case X86::MMX_MOVQ64rm: - case X86::FsVMOVAPSrm: - case X86::FsVMOVAPDrm: - case X86::FsMOVAPSrm: - case X86::FsMOVAPDrm: { - // Loads from constant pools are trivially rematerializable. - if (MI->getOperand(1).isReg() && - MI->getOperand(2).isImm() && - MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && - MI->isInvariantLoad(AA)) { - unsigned BaseReg = MI->getOperand(1).getReg(); - if (BaseReg == 0 || BaseReg == X86::RIP) - return true; - // Allow re-materialization of PIC load. - if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) - return false; - const MachineFunction &MF = *MI->getParent()->getParent(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); - bool isPICBase = false; - for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), - E = MRI.def_end(); I != E; ++I) { - MachineInstr *DefMI = I.getOperand().getParent(); - if (DefMI->getOpcode() != X86::MOVPC32r) - return false; - assert(!isPICBase && "More than one PIC base?"); - isPICBase = true; - } - return isPICBase; - } - return false; + case X86::MOV8rm: + case X86::MOV16rm: + case X86::MOV32rm: + case X86::MOV64rm: + case X86::LD_Fp64m: + case X86::MOVSSrm: + case X86::MOVSDrm: + case X86::MOVAPSrm: + case X86::MOVUPSrm: + case X86::MOVAPDrm: + case X86::MOVDQArm: + case X86::VMOVSSrm: + case X86::VMOVSDrm: + case X86::VMOVAPSrm: + case X86::VMOVUPSrm: + case X86::VMOVAPDrm: + case X86::VMOVDQArm: + case X86::VMOVAPSYrm: + case X86::VMOVUPSYrm: + case X86::VMOVAPDYrm: + case X86::VMOVDQAYrm: + case X86::MMX_MOVD64rm: + case X86::MMX_MOVQ64rm: + case X86::FsVMOVAPSrm: + case X86::FsVMOVAPDrm: + case X86::FsMOVAPSrm: + case X86::FsMOVAPDrm: { + // Loads from constant pools are trivially rematerializable. + if (MI->getOperand(1).isReg() && + MI->getOperand(2).isImm() && + MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && + MI->isInvariantLoad(AA)) { + unsigned BaseReg = MI->getOperand(1).getReg(); + if (BaseReg == 0 || BaseReg == X86::RIP) + return true; + // Allow re-materialization of PIC load. + if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) + return false; + const MachineFunction &MF = *MI->getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + return regIsPICBase(BaseReg, MRI); } + return false; + } - case X86::LEA32r: - case X86::LEA64r: { - if (MI->getOperand(2).isImm() && - MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && - !MI->getOperand(4).isReg()) { - // lea fi#, lea GV, etc. are all rematerializable. - if (!MI->getOperand(1).isReg()) - return true; - unsigned BaseReg = MI->getOperand(1).getReg(); - if (BaseReg == 0) - return true; - // Allow re-materialization of lea PICBase + x. - const MachineFunction &MF = *MI->getParent()->getParent(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); - return regIsPICBase(BaseReg, MRI); - } - return false; - } + case X86::LEA32r: + case X86::LEA64r: { + if (MI->getOperand(2).isImm() && + MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && + !MI->getOperand(4).isReg()) { + // lea fi#, lea GV, etc. are all rematerializable. + if (!MI->getOperand(1).isReg()) + return true; + unsigned BaseReg = MI->getOperand(1).getReg(); + if (BaseReg == 0) + return true; + // Allow re-materialization of lea PICBase + x. + const MachineFunction &MF = *MI->getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + return regIsPICBase(BaseReg, MRI); + } + return false; + } } // All other instructions marked M_REMATERIALIZABLE are always trivially @@ -1660,7 +1629,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, case X86::MOV64r0: { if (!isSafeToClobberEFLAGS(MBB, I)) { switch (Opc) { - default: break; + default: llvm_unreachable("Unreachable!"); case X86::MOV8r0: Opc = X86::MOV8ri; break; case X86::MOV16r0: Opc = X86::MOV16ri; break; case X86::MOV32r0: Opc = X86::MOV32ri; break; @@ -1733,8 +1702,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg); switch (MIOpc) { - default: - llvm_unreachable(0); + default: llvm_unreachable("Unreachable!"); case X86::SHL16ri: { unsigned ShAmt = MI->getOperand(2).getImm(); MIB.addReg(0).addImm(1 << ShAmt) @@ -2126,57 +2094,25 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { MI->getOperand(3).setImm(Size-Amt); return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); } - case X86::CMOVB16rr: - case X86::CMOVB32rr: - case X86::CMOVB64rr: - case X86::CMOVAE16rr: - case X86::CMOVAE32rr: - case X86::CMOVAE64rr: - case X86::CMOVE16rr: - case X86::CMOVE32rr: - case X86::CMOVE64rr: - case X86::CMOVNE16rr: - case X86::CMOVNE32rr: - case X86::CMOVNE64rr: - case X86::CMOVBE16rr: - case X86::CMOVBE32rr: - case X86::CMOVBE64rr: - case X86::CMOVA16rr: - case X86::CMOVA32rr: - case X86::CMOVA64rr: - case X86::CMOVL16rr: - case X86::CMOVL32rr: - case X86::CMOVL64rr: - case X86::CMOVGE16rr: - case X86::CMOVGE32rr: - case X86::CMOVGE64rr: - case X86::CMOVLE16rr: - case X86::CMOVLE32rr: - case X86::CMOVLE64rr: - case X86::CMOVG16rr: - case X86::CMOVG32rr: - case X86::CMOVG64rr: - case X86::CMOVS16rr: - case X86::CMOVS32rr: - case X86::CMOVS64rr: - case X86::CMOVNS16rr: - case X86::CMOVNS32rr: - case X86::CMOVNS64rr: - case X86::CMOVP16rr: - case X86::CMOVP32rr: - case X86::CMOVP64rr: - case X86::CMOVNP16rr: - case X86::CMOVNP32rr: - case X86::CMOVNP64rr: - case X86::CMOVO16rr: - case X86::CMOVO32rr: - case X86::CMOVO64rr: - case X86::CMOVNO16rr: - case X86::CMOVNO32rr: - case X86::CMOVNO64rr: { - unsigned Opc = 0; + case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr: + case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr: + case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr: + case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr: + case X86::CMOVBE16rr: case X86::CMOVBE32rr: case X86::CMOVBE64rr: + case X86::CMOVA16rr: case X86::CMOVA32rr: case X86::CMOVA64rr: + case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr: + case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr: + case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr: + case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr: + case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr: + case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr: + case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr: + case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr: + case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr: + case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: { + unsigned Opc; switch (MI->getOpcode()) { - default: break; + default: llvm_unreachable("Unreachable!"); case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break; case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break; case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break; @@ -2408,7 +2344,7 @@ static X86::CondCode getSwappedCondition(X86::CondCode CC) { /// whether it has memory operand. static unsigned getSETFromCond(X86::CondCode CC, bool HasMemoryOperand) { - static const unsigned Opc[16][2] = { + static const uint16_t Opc[16][2] = { { X86::SETAr, X86::SETAm }, { X86::SETAEr, X86::SETAEm }, { X86::SETBr, X86::SETBm }, @@ -2435,7 +2371,7 @@ static unsigned getSETFromCond(X86::CondCode CC, /// register size in bytes, and operand type. static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes, bool HasMemoryOperand) { - static const unsigned Opc[32][3] = { + static const uint16_t Opc[32][3] = { { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr }, { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr }, { X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr }, @@ -2768,19 +2704,18 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, // SrcReg(GR64) -> DestReg(VR64) if (X86::GR64RegClass.contains(DestReg)) { - if (X86::VR128RegClass.contains(SrcReg)) { + if (X86::VR128RegClass.contains(SrcReg)) // Copy from a VR128 register to a GR64 register. return HasAVX ? X86::VMOVPQIto64rr : X86::MOVPQIto64rr; - } else if (X86::VR64RegClass.contains(SrcReg)) { + if (X86::VR64RegClass.contains(SrcReg)) // Copy from a VR64 register to a GR64 register. return X86::MOVSDto64rr; - } } else if (X86::GR64RegClass.contains(SrcReg)) { // Copy from a GR64 register to a VR128 register. if (X86::VR128RegClass.contains(DestReg)) return HasAVX ? X86::VMOV64toPQIrr : X86::MOV64toPQIrr; // Copy from a GR64 register to a VR64 register. - else if (X86::VR64RegClass.contains(DestReg)) + if (X86::VR64RegClass.contains(DestReg)) return X86::MOV64toSDrr; } @@ -2788,12 +2723,12 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, // SrcReg(GR32) -> DestReg(FR32) if (X86::GR32RegClass.contains(DestReg) && X86::FR32RegClass.contains(SrcReg)) - // Copy from a FR32 register to a GR32 register. - return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr; + // Copy from a FR32 register to a GR32 register. + return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr; if (X86::FR32RegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg)) - // Copy from a GR32 register to a FR32 register. - return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr; + // Copy from a GR32 register to a FR32 register. + return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr; return 0; } @@ -2804,7 +2739,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, bool KillSrc) const { // First deal with the normal symmetric copies. bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); - unsigned Opc = 0; + unsigned Opc; if (X86::GR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MOV64rr; else if (X86::GR32RegClass.contains(DestReg, SrcReg)) @@ -2843,7 +2778,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(X86::PUSHF64)); BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); return; - } else if (X86::GR32RegClass.contains(DestReg)) { + } + if (X86::GR32RegClass.contains(DestReg)) { BuildMI(MBB, MI, DL, get(X86::PUSHF32)); BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); return; @@ -2855,7 +2791,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(KillSrc)); BuildMI(MBB, MI, DL, get(X86::POPF64)); return; - } else if (X86::GR32RegClass.contains(SrcReg)) { + } + if (X86::GR32RegClass.contains(SrcReg)) { BuildMI(MBB, MI, DL, get(X86::PUSH32r)) .addReg(SrcReg, getKillRegState(KillSrc)); BuildMI(MBB, MI, DL, get(X86::POPF32)); @@ -3037,6 +2974,37 @@ analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2, CmpMask = ~0; CmpValue = MI->getOperand(1).getImm(); return true; + // A SUB can be used to perform comparison. + case X86::SUB64rm: + case X86::SUB32rm: + case X86::SUB16rm: + case X86::SUB8rm: + SrcReg = MI->getOperand(1).getReg(); + SrcReg2 = 0; + CmpMask = ~0; + CmpValue = 0; + return true; + case X86::SUB64rr: + case X86::SUB32rr: + case X86::SUB16rr: + case X86::SUB8rr: + SrcReg = MI->getOperand(1).getReg(); + SrcReg2 = MI->getOperand(2).getReg(); + CmpMask = ~0; + CmpValue = 0; + return true; + case X86::SUB64ri32: + case X86::SUB64ri8: + case X86::SUB32ri: + case X86::SUB32ri8: + case X86::SUB16ri: + case X86::SUB16ri8: + case X86::SUB8ri: + SrcReg = MI->getOperand(1).getReg(); + SrcReg2 = 0; + CmpMask = ~0; + CmpValue = MI->getOperand(2).getImm(); + return true; case X86::CMP64rr: case X86::CMP32rr: case X86::CMP16rr: @@ -3145,6 +3113,55 @@ bool X86InstrInfo:: optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const { + // Check whether we can replace SUB with CMP. + unsigned NewOpcode = 0; + switch (CmpInstr->getOpcode()) { + default: break; + case X86::SUB64ri32: + case X86::SUB64ri8: + case X86::SUB32ri: + case X86::SUB32ri8: + case X86::SUB16ri: + case X86::SUB16ri8: + case X86::SUB8ri: + case X86::SUB64rm: + case X86::SUB32rm: + case X86::SUB16rm: + case X86::SUB8rm: + case X86::SUB64rr: + case X86::SUB32rr: + case X86::SUB16rr: + case X86::SUB8rr: { + if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg())) + return false; + // There is no use of the destination register, we can replace SUB with CMP. + switch (CmpInstr->getOpcode()) { + default: llvm_unreachable("Unreachable!"); + case X86::SUB64rm: NewOpcode = X86::CMP64rm; break; + case X86::SUB32rm: NewOpcode = X86::CMP32rm; break; + case X86::SUB16rm: NewOpcode = X86::CMP16rm; break; + case X86::SUB8rm: NewOpcode = X86::CMP8rm; break; + case X86::SUB64rr: NewOpcode = X86::CMP64rr; break; + case X86::SUB32rr: NewOpcode = X86::CMP32rr; break; + case X86::SUB16rr: NewOpcode = X86::CMP16rr; break; + case X86::SUB8rr: NewOpcode = X86::CMP8rr; break; + case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break; + case X86::SUB64ri8: NewOpcode = X86::CMP64ri8; break; + case X86::SUB32ri: NewOpcode = X86::CMP32ri; break; + case X86::SUB32ri8: NewOpcode = X86::CMP32ri8; break; + case X86::SUB16ri: NewOpcode = X86::CMP16ri; break; + case X86::SUB16ri8: NewOpcode = X86::CMP16ri8; break; + case X86::SUB8ri: NewOpcode = X86::CMP8ri; break; + } + CmpInstr->setDesc(get(NewOpcode)); + CmpInstr->RemoveOperand(0); + // Fall through to optimize Cmp if Cmp is CMPrr or CMPri. + if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm || + NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm) + return false; + } + } + // Get the unique definition of SrcReg. MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); if (!MI) return false; @@ -3221,12 +3238,15 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, MachineBasicBlock::iterator E = CmpInstr->getParent()->end(); for (++I; I != E; ++I) { const MachineInstr &Instr = *I; - if (Instr.modifiesRegister(X86::EFLAGS, TRI)) { + bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI); + bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI); + // We should check the usage if this instruction uses and updates EFLAGS. + if (!UseEFLAGS && ModifyEFLAGS) { // It is safe to remove CmpInstr if EFLAGS is updated again. IsSafe = true; break; } - if (!Instr.readsRegister(X86::EFLAGS, TRI)) + if (!UseEFLAGS && !ModifyEFLAGS) continue; // EFLAGS is used by this instruction. @@ -3281,7 +3301,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // instructions will be modified. OpsToUpdate.push_back(std::make_pair(&*I, NewOpc)); } - if (Instr.killsRegister(X86::EFLAGS, TRI)) { + if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) { + // It is safe to remove CmpInstr if EFLAGS is updated again or killed. IsSafe = true; break; } @@ -3319,6 +3340,81 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, return true; } +/// optimizeLoadInstr - Try to remove the load by folding it to a register +/// operand at the use. We fold the load instructions if load defines a virtual +/// register, the virtual register is used once in the same BB, and the +/// instructions in-between do not load or store, and have no side effects. +MachineInstr* X86InstrInfo:: +optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI, + unsigned &FoldAsLoadDefReg, + MachineInstr *&DefMI) const { + if (FoldAsLoadDefReg == 0) + return 0; + // To be conservative, if there exists another load, clear the load candidate. + if (MI->mayLoad()) { + FoldAsLoadDefReg = 0; + return 0; + } + + // Check whether we can move DefMI here. + DefMI = MRI->getVRegDef(FoldAsLoadDefReg); + assert(DefMI); + bool SawStore = false; + if (!DefMI->isSafeToMove(this, 0, SawStore)) + return 0; + + // We try to commute MI if possible. + unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1; + for (unsigned Idx = 0; Idx < IdxEnd; Idx++) { + // Collect information about virtual register operands of MI. + unsigned SrcOperandId = 0; + bool FoundSrcOperand = false; + for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (Reg != FoldAsLoadDefReg) + continue; + // Do not fold if we have a subreg use or a def or multiple uses. + if (MO.getSubReg() || MO.isDef() || FoundSrcOperand) + return 0; + + SrcOperandId = i; + FoundSrcOperand = true; + } + if (!FoundSrcOperand) return 0; + + // Check whether we can fold the def into SrcOperandId. + SmallVector<unsigned, 8> Ops; + Ops.push_back(SrcOperandId); + MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI); + if (FoldMI) { + FoldAsLoadDefReg = 0; + return FoldMI; + } + + if (Idx == 1) { + // MI was changed but it didn't help, commute it back! + commuteInstruction(MI, false); + return 0; + } + + // Check whether we can commute MI and enable folding. + if (MI->isCommutable()) { + MachineInstr *NewMI = commuteInstruction(MI, false); + // Unable to commute. + if (!NewMI) return 0; + if (NewMI != MI) { + // New instruction. It doesn't need to be kept. + NewMI->eraseFromParent(); + return 0; + } + } + } + return 0; +} + /// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr /// instruction with two undef reads of the register being defined. This is /// used for mapping: @@ -3477,6 +3573,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, OpcodeTablePtr = &RegOp2MemOpTable1; } else if (i == 2) { OpcodeTablePtr = &RegOp2MemOpTable2; + } else if (i == 3) { + OpcodeTablePtr = &RegOp2MemOpTable3; } // If table selected... @@ -3947,7 +4045,6 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, getUndefRegState(MO.isUndef())); } // Change CMP32ri r, 0 back to TEST32rr r, r, etc. - unsigned NewOpc = 0; switch (DataMI->getOpcode()) { default: break; case X86::CMP64ri32: @@ -3960,8 +4057,9 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, MachineOperand &MO0 = DataMI->getOperand(0); MachineOperand &MO1 = DataMI->getOperand(1); if (MO1.getImm() == 0) { + unsigned NewOpc; switch (DataMI->getOpcode()) { - default: break; + default: llvm_unreachable("Unreachable!"); case X86::CMP64ri8: case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; case X86::CMP32ri8: diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index ec9b2e6..b6f69af 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -387,6 +387,18 @@ public: unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const; + /// optimizeLoadInstr - Try to remove the load by folding it to a register + /// operand at the use. We fold the load instructions if and only if the + /// def and use are in the same BB. We only look at one load and see + /// whether it can be folded into MI. FoldAsLoadDefReg is the virtual register + /// defined by the load we are trying to fold. DefMI returns the machine + /// instruction that defines FoldAsLoadDefReg, and the function returns + /// the machine instruction generated due to folding. + virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI, + const MachineRegisterInfo *MRI, + unsigned &FoldAsLoadDefReg, + MachineInstr *&DefMI) const; + private: MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc, MachineFunction::iterator &MFI, diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index e4edd36..c8f40bb 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -251,7 +251,7 @@ def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (iPTR 0))))))], IIC_MMX_MOVQ_RR>; -def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), +def MMX_MOVQ2DQrr : S2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector @@ -259,7 +259,7 @@ def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), IIC_MMX_MOVQ_RR>; let neverHasSideEffects = 1 in -def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), +def MMX_MOVQ2FR64rr: S2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOVQ_RR>; @@ -554,20 +554,6 @@ def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src), (int_x86_mmx_pmovmskb VR64:$src))]>; -// MMX to XMM for vector types -def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1, - [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>; - -def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)), - (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; - -def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))), - (v2i64 (MOVQI2PQIrm addr:$src))>; - -def : Pat<(v2i64 (MMX_X86movq2dq - (x86mmx (scalar_to_vector (loadi32 addr:$src))))), - (v2i64 (MOVDI2PDIrm addr:$src))>; - // Low word of XMM to MMX. def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1, [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index c2d169a..220c06d 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -245,9 +245,9 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC, // A vector extract of the first f32/f64 position is a subregister copy def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>; def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), - (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>; // A 128-bit subvector extract from the first 256-bit vector position // is a subregister copy that needs no instruction. @@ -283,14 +283,14 @@ def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)), // Implicitly promote a 32-bit scalar to a vector. def : Pat<(v4f32 (scalar_to_vector FR32:$src)), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; + (COPY_TO_REGCLASS FR32:$src, VR128)>; def : Pat<(v8f32 (scalar_to_vector FR32:$src)), - (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; + (COPY_TO_REGCLASS FR32:$src, VR128)>; // Implicitly promote a 64-bit scalar to a vector. def : Pat<(v2f64 (scalar_to_vector FR64:$src)), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; + (COPY_TO_REGCLASS FR64:$src, VR128)>; def : Pat<(v4f64 (scalar_to_vector FR64:$src)), - (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; + (COPY_TO_REGCLASS FR64:$src, VR128)>; // Bitcasts between 128-bit vector types. Return the original type since // no instruction is needed for the conversion @@ -562,59 +562,57 @@ let Predicates = [HasAVX] in { def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (VMOVSSrr (v4f32 (V_SET0)), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; + (VMOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (VMOVSSrr (v4i32 (V_SET0)), - (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; + (VMOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>; // Move low f32 and clear high bits. def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), (SUBREG_TO_REG (i32 0), - (VMOVSSrr (v4f32 (V_SET0)), - (EXTRACT_SUBREG (v8f32 VR256:$src), sub_ss)), sub_xmm)>; + (VMOVSSrr (v4f32 (V_SET0)), + (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), sub_xmm)>; def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), (SUBREG_TO_REG (i32 0), - (VMOVSSrr (v4i32 (V_SET0)), - (EXTRACT_SUBREG (v8i32 VR256:$src), sub_ss)), sub_xmm)>; + (VMOVSSrr (v4i32 (V_SET0)), + (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), sub_xmm)>; } let AddedComplexity = 20 in { // MOVSSrm zeros the high parts of the register; represent this // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), - (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; // MOVSDrm zeros the high parts of the register; represent this // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), - (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (X86vzload addr:$src)), - (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; // Represent the same patterns above but in the form they appear for // 256-bit types def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, (v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))), - (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))), - (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, (v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))), - (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_sd)>; + (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; } def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, (v4f32 (scalar_to_vector FR32:$src)), (i32 0)))), @@ -628,70 +626,68 @@ let Predicates = [HasAVX] in { sub_xmm)>; def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, (v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))), - (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>; // Move low f64 and clear high bits. def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), (SUBREG_TO_REG (i32 0), - (VMOVSDrr (v2f64 (V_SET0)), - (EXTRACT_SUBREG (v4f64 VR256:$src), sub_sd)), sub_xmm)>; + (VMOVSDrr (v2f64 (V_SET0)), + (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)), sub_xmm)>; def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))), (SUBREG_TO_REG (i32 0), - (VMOVSDrr (v2i64 (V_SET0)), - (EXTRACT_SUBREG (v4i64 VR256:$src), sub_sd)), sub_xmm)>; + (VMOVSDrr (v2i64 (V_SET0)), + (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)), sub_xmm)>; // Extract and store. def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), addr:$dst), - (VMOVSSmr addr:$dst, - (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + (VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>; def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst), - (VMOVSDmr addr:$dst, - (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + (VMOVSDmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64))>; // Shuffle with VMOVSS def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), (VMOVSSrr (v4i32 VR128:$src1), - (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; + (COPY_TO_REGCLASS (v4i32 VR128:$src2), FR32))>; def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), (VMOVSSrr (v4f32 VR128:$src1), - (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; + (COPY_TO_REGCLASS (v4f32 VR128:$src2), FR32))>; // 256-bit variants def : Pat<(v8i32 (X86Movss VR256:$src1, VR256:$src2)), (SUBREG_TO_REG (i32 0), - (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_ss), - (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_ss)), sub_xmm)>; + (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_xmm), + (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_xmm)), + sub_xmm)>; def : Pat<(v8f32 (X86Movss VR256:$src1, VR256:$src2)), (SUBREG_TO_REG (i32 0), - (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_ss), - (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_ss)), sub_xmm)>; + (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_xmm), + (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_xmm)), + sub_xmm)>; // Shuffle with VMOVSD def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), - (VMOVSDrr (v2i64 VR128:$src1), - (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), - (VMOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), - sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), - sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; // 256-bit variants def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)), (SUBREG_TO_REG (i32 0), - (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_sd), - (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_sd)), sub_xmm)>; + (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_xmm), + (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_xmm)), + sub_xmm)>; def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)), (SUBREG_TO_REG (i32 0), - (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_sd), - (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_sd)), sub_xmm)>; + (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_xmm), + (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_xmm)), + sub_xmm)>; // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem @@ -699,17 +695,13 @@ let Predicates = [HasAVX] in { // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2), - sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2), - sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), - sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), - sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; } let Predicates = [HasSSE1] in { @@ -719,37 +711,31 @@ let Predicates = [HasSSE1] in { def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (MOVSSrr (v4f32 (V_SET0)), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; + (MOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (MOVSSrr (v4i32 (V_SET0)), - (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; + (MOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; } let AddedComplexity = 20 in { - // MOVSSrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. + // MOVSSrm already zeros the high parts of the register. def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; } // Extract and store. def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), addr:$dst), - (MOVSSmr addr:$dst, - (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + (MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>; // Shuffle with MOVSS def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), - (MOVSSrr (v4i32 VR128:$src1), - (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; + (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), - (MOVSSrr (v4f32 VR128:$src1), - (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; + (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; } let Predicates = [HasSSE2] in { @@ -761,50 +747,46 @@ let Predicates = [HasSSE2] in { } let AddedComplexity = 20 in { - // MOVSDrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. + // MOVSDrm already zeros the high parts of the register. def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (X86vzload addr:$src)), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; } // Extract and store. def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst), - (MOVSDmr addr:$dst, - (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + (MOVSDmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR64))>; // Shuffle with MOVSD def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr (v2i64 VR128:$src1), - (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem // is during lowering, where it's not possible to recognize the fold cause // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; } //===----------------------------------------------------------------------===// @@ -1416,14 +1398,15 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, } multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, - SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, - string asm, Domain d, OpndItins itins> { + X86MemOperand x86memop, string asm, Domain d, + OpndItins itins> { +let neverHasSideEffects = 1 in { def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [(set DstRC:$dst, (OpNode SrcRC:$src))], - itins.rr, d>; + [], itins.rr, d>; + let mayLoad = 1 in def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], - itins.rm, d>; + [], itins.rm, d>; +} } multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -1443,7 +1426,7 @@ defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, - "cvttss2si\t{$src, $dst|$dst, $src}", + "cvttss2si{q}\t{$src, $dst|$dst, $src}", SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, @@ -1451,7 +1434,7 @@ defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, - "cvttsd2si\t{$src, $dst|$dst, $src}", + "cvttsd2si{q}\t{$src, $dst|$dst, $src}", SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; @@ -1465,11 +1448,14 @@ defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS, VEX_4V, VEX_W, VEX_LIG; defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD, VEX_4V, VEX_LIG; -defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, - XD, VEX_4V, VEX_LIG; defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD, VEX_4V, VEX_W, VEX_LIG; +def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}", + (VCVTSI2SDrr FR64:$dst, FR64:$src1, GR32:$src)>; +def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}", + (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>; + let Predicates = [HasAVX], AddedComplexity = 1 in { def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; @@ -1519,14 +1505,14 @@ defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, // and/or XMM operand(s). multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, - Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, + Intrinsic Int, Operand memop, ComplexPattern mem_cpat, string asm, OpndItins itins> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>; - def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), + def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm>; + [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>; } multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, @@ -1548,30 +1534,31 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, itins.rm>; } -defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, - f128mem, load, "cvtsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; +defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, + int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si{l}", + SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, - int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si", - SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; + int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si{q}", + SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, - f128mem, load, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD; + sdmem, sse_load_f64, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD; defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, - f128mem, load, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W; + sdmem, sse_load_f64, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W; defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", SSE_CVT_Scalar, 0>, XS, VEX_4V; defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", + int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}", SSE_CVT_Scalar, 0>, XS, VEX_4V, VEX_W; defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", SSE_CVT_Scalar, 0>, XD, VEX_4V; defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", + int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", SSE_CVT_Scalar, 0>, XD, VEX_4V, VEX_W; @@ -1587,96 +1574,71 @@ let Constraints = "$src1 = $dst" in { "cvtsi2sd", SSE_CVT_Scalar>, XD; defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, int_x86_sse2_cvtsi642sd, i64mem, loadi64, - "cvtsi2sd", SSE_CVT_Scalar>, XD, REX_W; + "cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W; } /// SSE 1 Only // Aliases for intrinsics defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, - f32mem, load, "cvttss2si", + ssmem, sse_load_f32, "cvttss2si", SSE_CVT_SS2SI_32>, XS, VEX; defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, - int_x86_sse_cvttss2si64, f32mem, load, - "cvttss2si", SSE_CVT_SS2SI_64>, - XS, VEX, VEX_W; + int_x86_sse_cvttss2si64, ssmem, sse_load_f32, + "cvttss2si{q}", SSE_CVT_SS2SI_64>, + XS, VEX, VEX_W; defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, - f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>, - XD, VEX; + sdmem, sse_load_f64, "cvttsd2si", + SSE_CVT_SD2SI>, XD, VEX; defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, - int_x86_sse2_cvttsd2si64, f128mem, load, - "cvttsd2si", SSE_CVT_SD2SI>, - XD, VEX, VEX_W; + int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, + "cvttsd2si{q}", SSE_CVT_SD2SI>, + XD, VEX, VEX_W; defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, - f32mem, load, "cvttss2si", + ssmem, sse_load_f32, "cvttss2si", SSE_CVT_SS2SI_32>, XS; defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, - int_x86_sse_cvttss2si64, f32mem, load, - "cvttss2si{q}", SSE_CVT_SS2SI_64>, - XS, REX_W; + int_x86_sse_cvttss2si64, ssmem, sse_load_f32, + "cvttss2si{q}", SSE_CVT_SS2SI_64>, XS, REX_W; defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, - f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>, - XD; + sdmem, sse_load_f64, "cvttsd2si", + SSE_CVT_SD2SI>, XD; defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, - int_x86_sse2_cvttsd2si64, f128mem, load, - "cvttsd2si{q}", SSE_CVT_SD2SI>, - XD, REX_W; - -let Pattern = []<dag>, neverHasSideEffects = 1 in { -defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, - "cvtss2si{l}\t{$src, $dst|$dst, $src}", - SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; -defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load, - "cvtss2si\t{$src, $dst|$dst, $src}", - SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; -defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load, + int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, + "cvttsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W; + +defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, + ssmem, sse_load_f32, "cvtss2si{l}", + SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; +defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, + ssmem, sse_load_f32, "cvtss2si{q}", + SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; + +defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, + ssmem, sse_load_f32, "cvtss2si{l}", + SSE_CVT_SS2SI_32>, XS; +defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, + ssmem, sse_load_f32, "cvtss2si{q}", + SSE_CVT_SS2SI_64>, XS, REX_W; + +defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, "vcvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle, SSE_CVT_PS>, TB, VEX, - Requires<[HasAVX]>; -defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load, + SSEPackedSingle, SSE_CVT_PS>, + TB, VEX, Requires<[HasAVX]>; +defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem, "vcvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle, SSE_CVT_PS>, TB, VEX, - Requires<[HasAVX]>; -} - -let Pattern = []<dag>, neverHasSideEffects = 1 in { -defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/, - "cvtss2si{l}\t{$src, $dst|$dst, $src}", - SSE_CVT_SS2SI_32>, XS; -defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/, - "cvtss2si{q}\t{$src, $dst|$dst, $src}", - SSE_CVT_SS2SI_64>, XS, REX_W; -defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/, - "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle, SSE_CVT_PS>, TB, - Requires<[HasSSE2]>; -} + SSEPackedSingle, SSE_CVT_PS>, + TB, VEX, Requires<[HasAVX]>; -let Predicates = [HasAVX] in { - def : Pat<(int_x86_sse_cvtss2si VR128:$src), - (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; - def : Pat<(int_x86_sse_cvtss2si (load addr:$src)), - (VCVTSS2SIrm addr:$src)>; - def : Pat<(int_x86_sse_cvtss2si64 VR128:$src), - (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; - def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)), - (VCVTSS2SI64rm addr:$src)>; -} - -let Predicates = [HasSSE1] in { - def : Pat<(int_x86_sse_cvtss2si VR128:$src), - (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; - def : Pat<(int_x86_sse_cvtss2si (load addr:$src)), - (CVTSS2SIrm addr:$src)>; - def : Pat<(int_x86_sse_cvtss2si64 VR128:$src), - (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; - def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)), - (CVTSS2SI64rm addr:$src)>; -} +defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, + "cvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle, SSE_CVT_PS>, + TB, Requires<[HasSSE2]>; /// SSE 2 Only // Convert scalar double to scalar single +let neverHasSideEffects = 1 in { def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], @@ -1687,6 +1649,7 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG; +} def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, Requires<[HasAVX]>; @@ -1702,17 +1665,37 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), XD, Requires<[HasSSE2, OptForSize]>; -defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, - int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", - SSE_CVT_Scalar, 0>, - XS, VEX_4V; -let Constraints = "$src1 = $dst" in -defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, - int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", - SSE_CVT_Scalar>, XS; +def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], + IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>; +def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), + "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtsd2ss + VR128:$src1, sse_load_f64:$src2))], + IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>; + +let Constraints = "$src1 = $dst" in { +def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], + IIC_SSE_CVT_Scalar_RR>, XD, Requires<[HasSSE2]>; +def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), + "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtsd2ss + VR128:$src1, sse_load_f64:$src2))], + IIC_SSE_CVT_Scalar_RM>, XD, Requires<[HasSSE2]>; +} // Convert scalar single to scalar double // SSE2 instructions with XS prefix +let neverHasSideEffects = 1 in { def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1724,19 +1707,21 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>; +} -let Predicates = [HasAVX] in { +let AddedComplexity = 1 in { // give AVX priority def : Pat<(f64 (fextend FR32:$src)), - (VCVTSS2SDrr FR32:$src, FR32:$src)>; + (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>; def : Pat<(fextend (loadf32 addr:$src)), - (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>; - def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>; -} + (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>; -def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (MOVSSrm addr:$src))>, - Requires<[HasAVX, OptForSpeed]>; + def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX, OptForSize]>; + def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, + Requires<[HasAVX, OptForSpeed]>; +} // AddedComplexity = 1 def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", @@ -1762,67 +1747,60 @@ def : Pat<(extloadf32 addr:$src), def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, - Requires<[HasAVX]>; + [(set VR128:$dst, + (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], + IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>; def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2), + (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - (load addr:$src2)))], - IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, - Requires<[HasAVX]>; + [(set VR128:$dst, + (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], + IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>; let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, - Requires<[HasSSE2]>; + [(set VR128:$dst, + (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], + IIC_SSE_CVT_Scalar_RR>, XS, Requires<[HasSSE2]>; def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2), + (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - (load addr:$src2)))], - IIC_SSE_CVT_Scalar_RM>, XS, - Requires<[HasSSE2]>; + [(set VR128:$dst, + (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], + IIC_SSE_CVT_Scalar_RM>, XS, Requires<[HasSSE2]>; } // Convert packed single/double fp to doubleword def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", [], + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], IIC_SSE_CVT_PS_RR>, VEX; def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", [], + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], IIC_SSE_CVT_PS_RM>, VEX; def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", [], + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvt_ps2dq_256 VR256:$src))], IIC_SSE_CVT_PS_RR>, VEX; def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", [], + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))], IIC_SSE_CVT_PS_RM>, VEX; def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", [], + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], IIC_SSE_CVT_PS_RR>; def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", [], + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], IIC_SSE_CVT_PS_RM>; -let Predicates = [HasAVX] in { - def : Pat<(int_x86_sse2_cvtps2dq VR128:$src), - (VCVTPS2DQrr VR128:$src)>; - def : Pat<(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)), - (VCVTPS2DQrm addr:$src)>; -} - -let Predicates = [HasSSE2] in { - def : Pat<(int_x86_sse2_cvtps2dq VR128:$src), - (CVTPS2DQrr VR128:$src)>; - def : Pat<(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)), - (CVTPS2DQrm addr:$src)>; -} // Convert Packed Double FP to Packed DW Integers let Predicates = [HasAVX] in { @@ -1830,77 +1808,74 @@ let Predicates = [HasAVX] in { // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, + VEX; // XMM only def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", (VCVTPD2DQrr VR128:$dst, VR128:$src)>; def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtpd2dqx\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX; // YMM only def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX; def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), - "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; + "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>, + VEX, VEX_L; def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}", (VCVTPD2DQYrr VR128:$dst, VR256:$src)>; } def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", [], + "cvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))], IIC_SSE_CVT_PD_RM>; def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", [], + "cvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], IIC_SSE_CVT_PD_RR>; -let Predicates = [HasAVX] in { - def : Pat<(int_x86_sse2_cvtpd2dq VR128:$src), - (VCVTPD2DQrr VR128:$src)>; - def : Pat<(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)), - (VCVTPD2DQXrm addr:$src)>; -} - -let Predicates = [HasSSE2] in { - def : Pat<(int_x86_sse2_cvtpd2dq VR128:$src), - (CVTPD2DQrr VR128:$src)>; - def : Pat<(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)), - (CVTPD2DQrm addr:$src)>; -} - // Convert with truncation packed single/double fp to doubleword // SSE2 packed instructions with XS prefix -def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (int_x86_sse2_cvttps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>, VEX; -def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttps2dq - (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; -def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), +def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR256:$dst, - (int_x86_avx_cvtt_ps2dq_256 VR256:$src))], - IIC_SSE_CVT_PS_RR>, VEX; -def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + [(set VR128:$dst, + (int_x86_sse2_cvttps2dq VR128:$src))], + IIC_SSE_CVT_PS_RR>, VEX; +def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 - (memopv8f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; - -def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (int_x86_sse2_cvttps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>; -def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>; + [(set VR128:$dst, (int_x86_sse2_cvttps2dq + (memopv4f32 addr:$src)))], + IIC_SSE_CVT_PS_RM>, VEX; +def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvtt_ps2dq_256 VR256:$src))], + IIC_SSE_CVT_PS_RR>, VEX; +def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 + (memopv8f32 addr:$src)))], + IIC_SSE_CVT_PS_RM>, VEX; + +def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], + IIC_SSE_CVT_PS_RR>; +def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], + IIC_SSE_CVT_PS_RM>; let Predicates = [HasAVX] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), @@ -1952,16 +1927,6 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), (int_x86_sse2_cvttpd2dq VR128:$src))], IIC_SSE_CVT_PD_RR>, VEX; -def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>; -def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq - (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>; - // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. @@ -1977,10 +1942,14 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), // YMM only def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [], + "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_avx_cvtt_pd2dq_256 VR256:$src))], IIC_SSE_CVT_PD_RR>, VEX; def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), - "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [], + "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))], IIC_SSE_CVT_PD_RM>, VEX, VEX_L; def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>; @@ -1992,82 +1961,82 @@ let Predicates = [HasAVX] in { (VCVTTPD2DQYrm addr:$src)>; } // Predicates = [HasAVX] +def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], + IIC_SSE_CVT_PD_RR>; +def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq + (memopv2f64 addr:$src)))], + IIC_SSE_CVT_PD_RM>; + // Convert packed single to packed double let Predicates = [HasAVX] in { // SSE2 instructions without OpSize prefix def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", [], + "vcvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], IIC_SSE_CVT_PD_RR>, TB, VEX; +let neverHasSideEffects = 1, mayLoad = 1 in def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, TB, VEX; def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", [], + "vcvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvt_ps2_pd_256 VR128:$src))], IIC_SSE_CVT_PD_RR>, TB, VEX; def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", [], + "vcvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))], IIC_SSE_CVT_PD_RM>, TB, VEX; } let Predicates = [HasSSE2] in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2pd\t{$src, $dst|$dst, $src}", [], + "cvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], IIC_SSE_CVT_PD_RR>, TB; +let neverHasSideEffects = 1, mayLoad = 1 in def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, TB; } -let Predicates = [HasAVX] in { - def : Pat<(int_x86_sse2_cvtps2pd VR128:$src), - (VCVTPS2PDrr VR128:$src)>; -} - -let Predicates = [HasSSE2] in { - def : Pat<(int_x86_sse2_cvtps2pd VR128:$src), - (CVTPS2PDrr VR128:$src)>; -} - // Convert Packed DW Integers to Packed Double FP let Predicates = [HasAVX] in { -def VCVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDYrm : SSDI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDYrr : SSDI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +let neverHasSideEffects = 1, mayLoad = 1 in +def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", + []>, VEX; +def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX; +def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvtdq2_pd_256 + (bitconvert (memopv2i64 addr:$src))))]>, VEX; +def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX; } -def CVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), +let neverHasSideEffects = 1, mayLoad = 1 in +def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RR>; -def CVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", [], +def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], IIC_SSE_CVT_PD_RM>; -// 128 bit register conversion intrinsics -let Predicates = [HasAVX] in -def : Pat<(int_x86_sse2_cvtdq2pd VR128:$src), - (VCVTDQ2PDrr VR128:$src)>; - -let Predicates = [HasSSE2] in -def : Pat<(int_x86_sse2_cvtdq2pd VR128:$src), - (CVTDQ2PDrr VR128:$src)>; - // AVX 256-bit register conversion intrinsics let Predicates = [HasAVX] in { - def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src), - (VCVTDQ2PDYrr VR128:$src)>; - def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))), - (VCVTDQ2PDYrm addr:$src)>; - - def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src), - (VCVTPD2DQYrr VR256:$src)>; - def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)), - (VCVTPD2DQYrm addr:$src)>; - def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))), (VCVTDQ2PDYrr VR128:$src)>; def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), @@ -2079,48 +2048,44 @@ let Predicates = [HasAVX] in { // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], IIC_SSE_CVT_PD_RR>, VEX; // XMM only def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", (VCVTPD2PSrr VR128:$dst, VR128:$src)>; def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2psx\t{$src, $dst|$dst, $src}", [], + "cvtpd2psx\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], IIC_SSE_CVT_PD_RM>, VEX; // YMM only def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_avx_cvt_pd2_ps_256 VR256:$src))], IIC_SSE_CVT_PD_RR>, VEX; def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), - "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))], IIC_SSE_CVT_PD_RM>, VEX, VEX_L; def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}", (VCVTPD2PSYrr VR128:$dst, VR256:$src)>; def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], IIC_SSE_CVT_PD_RR>; def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], IIC_SSE_CVT_PD_RM>; -let Predicates = [HasAVX] in { - def : Pat<(int_x86_sse2_cvtpd2ps VR128:$src), - (VCVTPD2PSrr VR128:$src)>; - def : Pat<(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)), - (VCVTPD2PSXrm addr:$src)>; -} - -let Predicates = [HasSSE2] in { - def : Pat<(int_x86_sse2_cvtpd2ps VR128:$src), - (CVTPD2PSrr VR128:$src)>; - def : Pat<(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)), - (CVTPD2PSrm addr:$src)>; -} - // AVX 256-bit register conversion intrinsics // FIXME: Migrate SSE conversion intrinsics matching to use patterns as below // whenever possible to avoid declaring two versions of each one. @@ -2130,38 +2095,26 @@ let Predicates = [HasAVX] in { def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))), (VCVTDQ2PSYrm addr:$src)>; - def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src), - (VCVTPD2PSYrr VR256:$src)>; - def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)), - (VCVTPD2PSYrm addr:$src)>; - - def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src), - (VCVTPS2DQYrr VR256:$src)>; - def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)), - (VCVTPS2DQYrm addr:$src)>; - - def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src), - (VCVTPS2PDYrr VR128:$src)>; - def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)), - (VCVTPS2PDYrm addr:$src)>; - - def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src), - (VCVTTPD2DQYrr VR256:$src)>; - def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)), - (VCVTTPD2DQYrm addr:$src)>; - // Match fround and fextend for 128/256-bit conversions def : Pat<(v4f32 (fround (v4f64 VR256:$src))), (VCVTPD2PSYrr VR256:$src)>; def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), (VCVTPD2PSYrm addr:$src)>; + def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), + (VCVTPS2PDrr VR128:$src)>; def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), (VCVTPS2PDYrr VR128:$src)>; def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), (VCVTPS2PDYrm addr:$src)>; } +let Predicates = [HasSSE2] in { + // Match fextend for 128 conversions + def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), + (CVTPS2PDrr VR128:$src)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Compare Instructions //===----------------------------------------------------------------------===// @@ -2593,17 +2546,13 @@ let Predicates = [HasAVX] in { OpSize, VEX; def : Pat<(i32 (X86fgetsign FR32:$src)), - (VMOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, - sub_ss))>; + (VMOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>; def : Pat<(i64 (X86fgetsign FR32:$src)), - (VMOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, - sub_ss))>; + (VMOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>; def : Pat<(i32 (X86fgetsign FR64:$src)), - (VMOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, - sub_sd))>; + (VMOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>; def : Pat<(i64 (X86fgetsign FR64:$src)), - (VMOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, - sub_sd))>; + (VMOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>; // Assembler Only def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), @@ -2628,17 +2577,17 @@ defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", SSEPackedDouble>, TB, OpSize; def : Pat<(i32 (X86fgetsign FR32:$src)), - (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, - sub_ss))>, Requires<[HasSSE1]>; + (MOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>, + Requires<[HasSSE1]>; def : Pat<(i64 (X86fgetsign FR32:$src)), - (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, - sub_ss))>, Requires<[HasSSE1]>; + (MOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>, + Requires<[HasSSE1]>; def : Pat<(i32 (X86fgetsign FR64:$src)), - (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, - sub_sd))>, Requires<[HasSSE2]>; + (MOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>, + Requires<[HasSSE2]>; def : Pat<(i64 (X86fgetsign FR64:$src)), - (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, - sub_sd))>, Requires<[HasSSE2]>; + (MOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>, + Requires<[HasSSE2]>; //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Logical Instructions @@ -2923,7 +2872,8 @@ let isCommutable = 0 in { basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>, VEX_4V, VEX_LIG; defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, VEX_4V; + basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, + VEX_4V; defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>, basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>, VEX_4V, VEX_LIG; @@ -2974,6 +2924,23 @@ let Constraints = "$src1 = $dst" in { } } +let isCommutable = 1, isCodeGenOnly = 1 in { + defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>, + VEX_4V, VEX_LIG; + defm VMAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P, 0>, + basic_sse12_fp_binop_p_y<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>, VEX_4V; + defm VMINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S, 0>, + VEX_4V, VEX_LIG; + defm VMINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P, 0>, + basic_sse12_fp_binop_p_y<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>, VEX_4V; + let Constraints = "$src1 = $dst" in { + defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>, + basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>; + defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>, + basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>; + } +} + /// Unop Arithmetic /// In addition, we also have a special variant of the scalar form here to /// represent the associated intrinsic operation. This form is unlike the @@ -3236,34 +3203,30 @@ def : Pat<(f32 (X86frcp (load addr:$src))), let Predicates = [HasAVX], AddedComplexity = 1 in { def : Pat<(int_x86_sse_sqrt_ss VR128:$src), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), - (VSQRTSSr (f32 (IMPLICIT_DEF)), - (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)), - sub_ss)>; + (COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)), + (COPY_TO_REGCLASS VR128:$src, FR32)), + VR128)>; def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src), (VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; def : Pat<(int_x86_sse2_sqrt_sd VR128:$src), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), - (VSQRTSDr (f64 (IMPLICIT_DEF)), - (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd)), - sub_sd)>; + (COPY_TO_REGCLASS (VSQRTSDr (f64 (IMPLICIT_DEF)), + (COPY_TO_REGCLASS VR128:$src, FR64)), + VR128)>; def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src), (VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>; def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), - (VRSQRTSSr (f32 (IMPLICIT_DEF)), - (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)), - sub_ss)>; + (COPY_TO_REGCLASS (VRSQRTSSr (f32 (IMPLICIT_DEF)), + (COPY_TO_REGCLASS VR128:$src, FR32)), + VR128)>; def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src), (VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; def : Pat<(int_x86_sse_rcp_ss VR128:$src), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), - (VRCPSSr (f32 (IMPLICIT_DEF)), - (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)), - sub_ss)>; + (COPY_TO_REGCLASS (VRCPSSr (f32 (IMPLICIT_DEF)), + (COPY_TO_REGCLASS VR128:$src, FR32)), + VR128)>; def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src), (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; } @@ -4609,7 +4572,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), // Bitcast FR64 <-> GR64 // let Predicates = [HasAVX] in -def VMOV64toSDrm : SSDI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), +def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, VEX; @@ -4622,7 +4585,7 @@ def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), [(store (i64 (bitconvert FR64:$src)), addr:$dst)], IIC_SSE_MOVDQ>, VEX; -def MOV64toSDrm : SSDI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), +def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))], IIC_SSE_MOVDQ>; @@ -5505,16 +5468,14 @@ let usesCustomInserter = 1 in { def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>, Requires<[HasSSE3]>; -def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2), - [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>, - Requires<[HasSSE3]>; } let Uses = [EAX, ECX, EDX] in def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", [], IIC_SSE_MONITOR>, TB, Requires<[HasSSE3]>; let Uses = [ECX, EAX] in -def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", [], IIC_SSE_MWAIT>, +def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", + [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>, TB, Requires<[HasSSE3]>; def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>; @@ -6906,81 +6867,42 @@ let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { } // Packed Compare Implicit Length Strings, Return Index -let Defs = [ECX, EFLAGS] in { - multiclass SS42AI_pcmpistri<Intrinsic IntId128, string asm = "pcmpistri"> { +let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in { + multiclass SS42AI_pcmpistri<string asm> { def rr : SS42AI<0x63, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), - (implicit EFLAGS)]>, OpSize; + []>, OpSize; + let mayLoad = 1 in def rm : SS42AI<0x63, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)), - (implicit EFLAGS)]>, OpSize; + []>, OpSize; } } -let Predicates = [HasAVX] in { -defm VPCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">, - VEX; -defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">, - VEX; -defm VPCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128, "vpcmpistri">, - VEX; -defm VPCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128, "vpcmpistri">, - VEX; -defm VPCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128, "vpcmpistri">, - VEX; -defm VPCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128, "vpcmpistri">, - VEX; -} - -defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>; -defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>; -defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>; -defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>; -defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>; -defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>; +let Predicates = [HasAVX] in +defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX; +defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; // Packed Compare Explicit Length Strings, Return Index -let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in { - multiclass SS42AI_pcmpestri<Intrinsic IntId128, string asm = "pcmpestri"> { +let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { + multiclass SS42AI_pcmpestri<string asm> { def rr : SS42AI<0x61, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)), - (implicit EFLAGS)]>, OpSize; + []>, OpSize; + let mayLoad = 1 in def rm : SS42AI<0x61, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - [(set ECX, - (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)), - (implicit EFLAGS)]>, OpSize; + []>, OpSize; } } -let Predicates = [HasAVX] in { -defm VPCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">, - VEX; -defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">, - VEX; -defm VPCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128, "vpcmpestri">, - VEX; -defm VPCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128, "vpcmpestri">, - VEX; -defm VPCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128, "vpcmpestri">, - VEX; -defm VPCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128, "vpcmpestri">, - VEX; -} - -defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>; -defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>; -defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>; -defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>; -defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>; -defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>; +let Predicates = [HasAVX] in +defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX; +defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; //===----------------------------------------------------------------------===// // SSE4.2 - CRC Instructions @@ -7727,24 +7649,18 @@ let Predicates = [HasAVX2] in { // is used by additional users, which prevents the pattern selection. let AddedComplexity = 20 in { def : Pat<(v4f32 (X86VBroadcast FR32:$src)), - (VBROADCASTSSrr - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>; + (VBROADCASTSSrr (COPY_TO_REGCLASS FR32:$src, VR128))>; def : Pat<(v8f32 (X86VBroadcast FR32:$src)), - (VBROADCASTSSYrr - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>; + (VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>; def : Pat<(v4f64 (X86VBroadcast FR64:$src)), - (VBROADCASTSDYrr - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd))>; + (VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>; def : Pat<(v4i32 (X86VBroadcast GR32:$src)), - (VBROADCASTSSrr - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>; + (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>; def : Pat<(v8i32 (X86VBroadcast GR32:$src)), - (VBROADCASTSSYrr - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>; + (VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>; def : Pat<(v4i64 (X86VBroadcast GR64:$src)), - (VBROADCASTSDYrr - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd))>; + (VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>; } } @@ -7768,46 +7684,26 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), let AddedComplexity = 20 in { // 128bit broadcasts: def : Pat<(v4f32 (X86VBroadcast FR32:$src)), - (VPSHUFDri - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), 0)>; + (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>; def : Pat<(v8f32 (X86VBroadcast FR32:$src)), (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), - (VPSHUFDri - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), 0), - sub_xmm), - (VPSHUFDri - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), - 0), 1)>; + (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm), + (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>; def : Pat<(v4f64 (X86VBroadcast FR64:$src)), (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), - (VPSHUFDri - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd), - 0x44), - sub_xmm), - (VPSHUFDri - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd), - 0x44), 1)>; + (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm), + (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>; def : Pat<(v4i32 (X86VBroadcast GR32:$src)), - (VPSHUFDri - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss), 0)>; + (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>; def : Pat<(v8i32 (X86VBroadcast GR32:$src)), (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), - (VPSHUFDri - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss), 0), - sub_xmm), - (VPSHUFDri - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss), - 0), 1)>; + (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), sub_xmm), + (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), 1)>; def : Pat<(v4i64 (X86VBroadcast GR64:$src)), (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), - (VPSHUFDri - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd), - 0x44), - sub_xmm), - (VPSHUFDri - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd), - 0x44), 1)>; + (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), sub_xmm), + (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>; } } @@ -8052,7 +7948,7 @@ multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256, []>, VEX_4VOp3, VEX_L; } -let Constraints = "$src1 = $dst, $mask = $mask_wb" in { +let mayLoad = 1, Constraints = "$src1 = $dst, $mask = $mask_wb" in { defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W; defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W; defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>; diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 0168d12..7ac4cec 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -532,6 +532,15 @@ uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) { #endif } +template<typename T> void addUnaligned(void *Pos, T Delta) { + T Value; + std::memcpy(reinterpret_cast<char*>(&Value), reinterpret_cast<char*>(Pos), + sizeof(T)); + Value += Delta; + std::memcpy(reinterpret_cast<char*>(Pos), reinterpret_cast<char*>(&Value), + sizeof(T)); +} + /// relocate - Before the JIT can run a block of code that has been emitted, /// it must rewrite the code to contain the actual addresses of any /// referenced global symbols. @@ -545,24 +554,24 @@ void X86JITInfo::relocate(void *Function, MachineRelocation *MR, // PC relative relocation, add the relocated value to the value already in // memory, after we adjust it for where the PC is. ResultPtr = ResultPtr -(intptr_t)RelocPos - 4 - MR->getConstantVal(); - *((unsigned*)RelocPos) += (unsigned)ResultPtr; + addUnaligned<unsigned>(RelocPos, ResultPtr); break; } case X86::reloc_picrel_word: { // PIC base relative relocation, add the relocated value to the value // already in memory, after we adjust it for where the PIC base is. ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal()); - *((unsigned*)RelocPos) += (unsigned)ResultPtr; + addUnaligned<unsigned>(RelocPos, ResultPtr); break; } case X86::reloc_absolute_word: case X86::reloc_absolute_word_sext: // Absolute relocation, just add the relocated value to the value already // in memory. - *((unsigned*)RelocPos) += (unsigned)ResultPtr; + addUnaligned<unsigned>(RelocPos, ResultPtr); break; case X86::reloc_absolute_dword: - *((intptr_t*)RelocPos) += ResultPtr; + addUnaligned<intptr_t>(RelocPos, ResultPtr); break; } } diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h index c76d3cc..d7c08df 100644 --- a/lib/Target/X86/X86JITInfo.h +++ b/lib/Target/X86/X86JITInfo.h @@ -65,7 +65,7 @@ namespace llvm { /// referenced global symbols. virtual void relocate(void *Function, MachineRelocation *MR, unsigned NumRelocs, unsigned char* GOTBase); - + /// allocateThreadLocalMemory - Each target has its own way of /// handling thread local variables. This method returns a value only /// meaningful to the target. diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index df7507c..9c0ce4e 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -46,12 +46,12 @@ GetSymbolFromOperand(const MachineOperand &MO) const { assert((MO.isGlobal() || MO.isSymbol()) && "Isn't a symbol reference"); SmallString<128> Name; - + if (!MO.isGlobal()) { assert(MO.isSymbol()); Name += MAI.getGlobalPrefix(); Name += MO.getSymbolName(); - } else { + } else { const GlobalValue *GV = MO.getGlobal(); bool isImplicitlyPrivate = false; if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB || @@ -59,7 +59,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE || MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE) isImplicitlyPrivate = true; - + Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate); } @@ -110,7 +110,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { getMachOMMI().getFnStubEntry(Sym); if (StubSym.getPointer()) return Sym; - + if (MO.isGlobal()) { StubSym = MachineModuleInfoImpl:: @@ -135,7 +135,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, // lot of extra uniquing. const MCExpr *Expr = 0; MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; - + switch (MO.getTargetFlags()) { default: llvm_unreachable("Unknown target flag on GV operand"); case X86II::MO_NO_FLAG: // No flag. @@ -144,7 +144,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_DLLIMPORT: case X86II::MO_DARWIN_STUB: break; - + case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break; case X86II::MO_TLVP_PIC_BASE: Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); @@ -173,7 +173,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: Expr = MCSymbolRefExpr::Create(Sym, Ctx); // Subtract the pic base. - Expr = MCBinaryExpr::CreateSub(Expr, + Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx), Ctx); if (MO.isJTI() && MAI.hasSetDirective()) { @@ -187,10 +187,10 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, } break; } - + if (Expr == 0) Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx); - + if (!MO.isJTI() && MO.getOffset()) Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), @@ -211,10 +211,10 @@ static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) { // Convert registers in the addr mode according to subreg64. for (unsigned i = 0; i != 4; ++i) { if (!MI->getOperand(OpNo+i).isReg()) continue; - + unsigned Reg = MI->getOperand(OpNo+i).getReg(); if (Reg == 0) continue; - + MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64)); } } @@ -280,7 +280,7 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst, return; // Check whether this is an absolute address. - // FIXME: We know TLVP symbol refs aren't, but there should be a better way + // FIXME: We know TLVP symbol refs aren't, but there should be a better way // to do this here. bool Absolute = true; if (Inst.getOperand(AddrOp).isExpr()) { @@ -289,7 +289,7 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst, if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP) Absolute = false; } - + if (Absolute && (Inst.getOperand(AddrBase + 0).getReg() != 0 || Inst.getOperand(AddrBase + 2).getReg() != 0 || @@ -306,10 +306,10 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst, void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); - + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - + MCOperand MCOp; switch (MO.getType()) { default: @@ -345,10 +345,10 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { // Ignore call clobbers. continue; } - + OutMI.addOperand(MCOp); } - + // Handle a few special cases to eliminate operand modifiers. ReSimplify: switch (OutMI.getOpcode()) { @@ -425,7 +425,7 @@ ReSimplify: case X86::TAILJMPd: case X86::TAILJMPd64: Opcode = X86::JMP_1; break; } - + MCOperand Saved = OutMI.getOperand(0); OutMI = MCInst(); OutMI.setOpcode(Opcode); @@ -445,7 +445,7 @@ ReSimplify: case X86::ADD16ri8_DB: OutMI.setOpcode(X86::OR16ri8); goto ReSimplify; case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify; case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify; - + // The assembler backend wants to see branches in their small form and relax // them to their large form. The JIT can only handle the large form because // it does not do relaxation. For now, translate the large form to the @@ -688,7 +688,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { // call "L1$pb" // "L1$pb": // popl %esi - + // Emit the call. MCSymbol *PICBase = MF->getPICBaseSymbol(); TmpInst.setOpcode(X86::CALLpcrel32); @@ -697,43 +697,43 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase, OutContext))); OutStreamer.EmitInstruction(TmpInst); - + // Emit the label. OutStreamer.EmitLabel(PICBase); - + // popl $reg TmpInst.setOpcode(X86::POP32r); TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg()); OutStreamer.EmitInstruction(TmpInst); return; } - + case X86::ADD32ri: { // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri. if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS) break; - + // Okay, we have something like: // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL) - + // For this, we want to print something like: // MYGLOBAL + (. - PICBASE) // However, we can't generate a ".", so just emit a new label here and refer // to it. MCSymbol *DotSym = OutContext.CreateTempSymbol(); OutStreamer.EmitLabel(DotSym); - + // Now that we have emitted the label, lower the complex operand expression. MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2)); - + const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext); const MCExpr *PICBase = MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext); DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext); - - DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext), + + DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext), DotExpr, OutContext); - + MCInst TmpInst; TmpInst.setOpcode(X86::ADD32ri); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); @@ -743,7 +743,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } } - + MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); OutStreamer.EmitInstruction(TmpInst); diff --git a/lib/Target/X86/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h index 40df3db..b4d4cfd 100644 --- a/lib/Target/X86/X86MCInstLower.h +++ b/lib/Target/X86/X86MCInstLower.h @@ -25,7 +25,7 @@ namespace llvm { class Mangler; class TargetMachine; class X86AsmPrinter; - + /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. class LLVM_LIBRARY_VISIBILITY X86MCInstLower { MCContext &Ctx; @@ -37,12 +37,12 @@ class LLVM_LIBRARY_VISIBILITY X86MCInstLower { public: X86MCInstLower(Mangler *mang, const MachineFunction &MF, X86AsmPrinter &asmprinter); - + void Lower(const MachineInstr *MI, MCInst &OutMI) const; MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; - + private: MachineModuleInfoMachO &getMachOMMI() const; }; diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index f83a525..78d20ce 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -24,7 +24,7 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { virtual void anchor(); /// ForceFramePointer - True if the function is required to use of frame - /// pointer for reasons other than it containing dynamic allocation or + /// pointer for reasons other than it containing dynamic allocation or /// that FP eliminatation is turned off. For example, Cygwin main function /// contains stack pointer re-alignment code which requires FP. bool ForceFramePointer; @@ -83,7 +83,7 @@ public: VarArgsFPOffset(0), ArgumentStackSize(0), NumLocalDynamics(0) {} - + explicit X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false), CalleeSavedFrameSize(0), @@ -99,7 +99,7 @@ public: ArgumentStackSize(0), NumLocalDynamics(0) {} - bool getForceFramePointer() const { return ForceFramePointer;} + bool getForceFramePointer() const { return ForceFramePointer;} void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index acf53f8..877b8f6 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -72,13 +72,15 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, SlotSize = 8; StackPtr = X86::RSP; FramePtr = X86::RBP; - BasePtr = X86::RBX; } else { SlotSize = 4; StackPtr = X86::ESP; FramePtr = X86::EBP; - BasePtr = X86::EBX; } + // Use a callee-saved register as the base pointer. These registers must + // not conflict with any ABI requirements. For example, in 32-bit mode PIC + // requires GOT in the EBX register before function calls via PLT GOT pointer. + BasePtr = Is64Bit ? X86::RBX : X86::ESI; } /// getCompactUnwindRegNum - This function maps the register to the number for @@ -366,7 +368,7 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { if (!EnableBasePointer) return false; - // When we need stack realignment and there are dynamic allocas, we can't + // When we need stack realignment and there are dynamic allocas, we can't // reference off of the stack pointer, so we reserve a base pointer. if (needsStackRealignment(MF) && MFI->hasVarSizedObjects()) return true; diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index ae2d4d0..edc7184 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -23,9 +23,6 @@ let Namespace = "X86" in { def sub_8bit_hi : SubRegIndex; def sub_16bit : SubRegIndex; def sub_32bit : SubRegIndex; - - def sub_ss : SubRegIndex; - def sub_sd : SubRegIndex; def sub_xmm : SubRegIndex; @@ -163,8 +160,6 @@ let Namespace = "X86" in { def FP6 : Register<"fp6">; // XMM Registers, used by the various SSE instruction set extensions. - // The sub_ss and sub_sd subregs are the same registers with another regclass. - let CompositeIndices = [(sub_ss), (sub_sd)] in { def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>; def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>; def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>; @@ -184,7 +179,7 @@ let Namespace = "X86" in { def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>; def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>; def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>; - }} + } // CostPerUse // YMM Registers, used by AVX instructions let SubRegIndices = [sub_xmm] in { diff --git a/lib/Target/X86/X86Relocations.h b/lib/Target/X86/X86Relocations.h index 857becf..0333056 100644 --- a/lib/Target/X86/X86Relocations.h +++ b/lib/Target/X86/X86Relocations.h @@ -21,7 +21,7 @@ namespace llvm { /// RelocationType - An enum for the x86 relocation codes. Note that /// the terminology here doesn't follow x86 convention - word means /// 32-bit and dword means 64-bit. The relocations will be treated - /// by JIT or ObjectCode emitters, this is transparent to the x86 code + /// by JIT or ObjectCode emitters, this is transparent to the x86 code /// emitter but JIT and ObjectCode will treat them differently enum RelocationType { /// reloc_pcrel_word - PC relative relocation, add the relocated value to diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 7c6788f..00edcbc 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -38,7 +38,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256) return SDValue(); - + // If not DWORD aligned or size is more than the threshold, call the library. // The libc version is likely to be faster for these cases. It can use the // address value and run time information about the CPU. diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index e6e9c56..9087852 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -39,10 +39,10 @@ unsigned char X86Subtarget:: ClassifyBlockAddressReference() const { if (isPICStyleGOT()) // 32-bit ELF targets. return X86II::MO_GOTOFF; - + if (isPICStyleStubPIC()) // Darwin/32 in PIC mode. return X86II::MO_PIC_BASE_OFFSET; - + // Direct static reference to label. return X86II::MO_NO_FLAG; } @@ -69,7 +69,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { // Large model never uses stubs. if (TM.getCodeModel() == CodeModel::Large) return X86II::MO_NO_FLAG; - + if (isTargetDarwin()) { // If symbol visibility is hidden, the extra load is not needed if // target is x86-64 or the symbol is definitely defined in the current @@ -87,18 +87,18 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { return X86II::MO_NO_FLAG; } - + if (isPICStyleGOT()) { // 32-bit ELF targets. // Extra load is needed for all externally visible. if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) return X86II::MO_GOTOFF; return X86II::MO_GOT; } - + if (isPICStyleStubPIC()) { // Darwin/32 in PIC mode. // Determine whether we have a stub reference and/or whether the reference // is relative to the PIC base or not. - + // If this is a strong reference to a definition, it is definitely not // through a stub. if (!isDecl && !GV->isWeakForLinker()) @@ -108,26 +108,26 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { // normal $non_lazy_ptr stub because this symbol might be resolved late. if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. return X86II::MO_DARWIN_NONLAZY_PIC_BASE; - + // If symbol visibility is hidden, we have a stub for common symbol // references and external declarations. if (isDecl || GV->hasCommonLinkage()) { // Hidden $non_lazy_ptr reference. return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE; } - + // Otherwise, no stub. return X86II::MO_PIC_BASE_OFFSET; } - + if (isPICStyleStubNoDynamic()) { // Darwin/32 in -mdynamic-no-pic mode. // Determine whether we have a stub reference. - + // If this is a strong reference to a definition, it is definitely not // through a stub. if (!isDecl && !GV->isWeakForLinker()) return X86II::MO_NO_FLAG; - + // Unless we have a symbol with hidden visibility, we have to go through a // normal $non_lazy_ptr stub because this symbol might be resolved late. if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. @@ -136,7 +136,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { // Otherwise, no stub. return X86II::MO_NO_FLAG; } - + // Direct static reference to global. return X86II::MO_NO_FLAG; } @@ -246,8 +246,11 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { } // If it's Nehalem, unaligned memory access is fast. - // FIXME: Nehalem is family 6. Also include Westmere and later processors? - if (Family == 15 && Model == 26) { + // Include Westmere and Sandy Bridge as well. + // FIXME: add later processors. + if (IsIntel && ((Family == 6 && Model == 26) || + (Family == 6 && Model == 44) || + (Family == 6 && Model == 42))) { IsUAMemFast = true; ToggleFeature(X86::FeatureFastUAMem); } @@ -315,7 +318,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { } X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, + const std::string &FS, unsigned StackAlignOverride, bool is64Bit) : X86GenSubtargetInfo(TT, CPU, FS) , X86ProcFamily(Others) @@ -397,10 +400,10 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, } } - if (X86ProcFamily == IntelAtom) { + if (X86ProcFamily == IntelAtom) PostRAScheduler = true; - InstrItins = getInstrItineraryForCPU(CPUName); - } + + InstrItins = getInstrItineraryForCPU(CPUName); // It's important to keep the MCSubtargetInfo feature bits in sync with // target data structure which is shared with MC code emitter, etc. diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 1af585f..6841c5b 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -55,7 +55,7 @@ protected: /// X86ProcFamily - X86 processor family: Intel Atom, and others X86ProcFamilyEnum X86ProcFamily; - + /// PICStyle - Which PIC style to use /// PICStyles::Style PICStyle; @@ -149,7 +149,7 @@ protected: /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; - + /// Instruction itineraries for scheduling InstrItineraryData InstrItins; diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index e4f567f..80b75dc 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -222,7 +222,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, DebugLoc dl = I->getDebugLoc(); bool isControlFlow = MI->isCall() || MI->isReturn(); - // Shortcut: don't need to check regular instructions in dirty state. + // Shortcut: don't need to check regular instructions in dirty state. if (!isControlFlow && CurState == ST_DIRTY) continue; diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index 3dbc3b9..a4e5647 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -371,8 +371,3 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, false)); } } - -void XCoreFrameLowering:: -processFunctionBeforeFrameFinalized(MachineFunction &MF) const { - -} diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h index afa2773..db1bbb6 100644 --- a/lib/Target/XCore/XCoreFrameLowering.h +++ b/lib/Target/XCore/XCoreFrameLowering.h @@ -44,8 +44,6 @@ namespace llvm { void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; - //! Stack slot size (4 bytes) static int stackSlotSize() { return 4; |
