diff options
Diffstat (limited to 'lib/Target/X86/X86InstrInfo.cpp')
| -rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 460 |
1 files changed, 279 insertions, 181 deletions
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 69493bc..459f01a 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -414,12 +414,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 }, { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 }, { X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 }, - { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, - { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, - { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, - { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, - { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, - { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 }, { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 }, { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, @@ -680,6 +674,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::IMUL64rr, X86::IMUL64rm, 0 }, { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, + { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, + { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, + { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, + { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, + { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, + { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 }, { X86::MAXPDrr_Int, X86::MAXPDrm_Int, TB_ALIGN_16 }, { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 }, @@ -1130,8 +1130,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMADDSDr132r, X86::VFMADDSDr132m, 0 }, { X86::VFMADDSSr213r, X86::VFMADDSSr213m, 0 }, { X86::VFMADDSDr213r, X86::VFMADDSDr213m, 0 }, - { X86::VFMADDSSr132r_Int, X86::VFMADDSSr132m_Int, 0 }, - { X86::VFMADDSDr132r_Int, X86::VFMADDSDr132m_Int, 0 }, + { X86::VFMADDSSr213r_Int, X86::VFMADDSSr213m_Int, 0 }, + { X86::VFMADDSDr213r_Int, X86::VFMADDSDr213m_Int, 0 }, { X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_16 }, { X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_16 }, @@ -1145,10 +1145,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMADDPDr132rY, X86::VFMADDPDr132mY, TB_ALIGN_32 }, { X86::VFMADDPSr213rY, X86::VFMADDPSr213mY, TB_ALIGN_32 }, { X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_32 }, - { X86::VFMADDPSr132r_Int, X86::VFMADDPSr132m_Int, TB_ALIGN_16 }, - { X86::VFMADDPDr132r_Int, X86::VFMADDPDr132m_Int, TB_ALIGN_16 }, - { X86::VFMADDPSr132rY_Int, X86::VFMADDPSr132mY_Int, TB_ALIGN_32 }, - { X86::VFMADDPDr132rY_Int, X86::VFMADDPDr132mY_Int, TB_ALIGN_32 }, { X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, 0 }, { X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, 0 }, @@ -1156,8 +1152,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, 0 }, { X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, 0 }, { X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, 0 }, - { X86::VFNMADDSSr132r_Int, X86::VFNMADDSSr132m_Int, 0 }, - { X86::VFNMADDSDr132r_Int, X86::VFNMADDSDr132m_Int, 0 }, + { X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr213m_Int, 0 }, + { X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr213m_Int, 0 }, { X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_16 }, { X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_16 }, @@ -1171,10 +1167,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFNMADDPDr132rY, X86::VFNMADDPDr132mY, TB_ALIGN_32 }, { X86::VFNMADDPSr213rY, X86::VFNMADDPSr213mY, TB_ALIGN_32 }, { X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_32 }, - { X86::VFNMADDPSr132r_Int, X86::VFNMADDPSr132m_Int, TB_ALIGN_16 }, - { X86::VFNMADDPDr132r_Int, X86::VFNMADDPDr132m_Int, TB_ALIGN_16 }, - { X86::VFNMADDPSr132rY_Int, X86::VFNMADDPSr132mY_Int, TB_ALIGN_32 }, - { X86::VFNMADDPDr132rY_Int, X86::VFNMADDPDr132mY_Int, TB_ALIGN_32 }, { X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, 0 }, { X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, 0 }, @@ -1182,8 +1174,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, 0 }, { X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, 0 }, { X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, 0 }, - { X86::VFMSUBSSr132r_Int, X86::VFMSUBSSr132m_Int, 0 }, - { X86::VFMSUBSDr132r_Int, X86::VFMSUBSDr132m_Int, 0 }, + { X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr213m_Int, 0 }, + { X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr213m_Int, 0 }, { X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_16 }, { X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_16 }, @@ -1197,10 +1189,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBPDr132rY, X86::VFMSUBPDr132mY, TB_ALIGN_32 }, { X86::VFMSUBPSr213rY, X86::VFMSUBPSr213mY, TB_ALIGN_32 }, { X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_32 }, - { X86::VFMSUBPSr132r_Int, X86::VFMSUBPSr132m_Int, TB_ALIGN_16 }, - { X86::VFMSUBPDr132r_Int, X86::VFMSUBPDr132m_Int, TB_ALIGN_16 }, - { X86::VFMSUBPSr132rY_Int, X86::VFMSUBPSr132mY_Int, TB_ALIGN_32 }, - { X86::VFMSUBPDr132rY_Int, X86::VFMSUBPDr132mY_Int, TB_ALIGN_32 }, { X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, 0 }, { X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, 0 }, @@ -1208,8 +1196,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, 0 }, { X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, 0 }, { X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, 0 }, - { X86::VFNMSUBSSr132r_Int, X86::VFNMSUBSSr132m_Int, 0 }, - { X86::VFNMSUBSDr132r_Int, X86::VFNMSUBSDr132m_Int, 0 }, + { X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr213m_Int, 0 }, + { X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr213m_Int, 0 }, { X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_16 }, { X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_16 }, @@ -1223,10 +1211,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr132mY, TB_ALIGN_32 }, { X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr213mY, TB_ALIGN_32 }, { X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr213mY, TB_ALIGN_32 }, - { X86::VFNMSUBPSr132r_Int, X86::VFNMSUBPSr132m_Int, TB_ALIGN_16 }, - { X86::VFNMSUBPDr132r_Int, X86::VFNMSUBPDr132m_Int, TB_ALIGN_16 }, - { X86::VFNMSUBPSr132rY_Int, X86::VFNMSUBPSr132mY_Int, TB_ALIGN_32 }, - { X86::VFNMSUBPDr132rY_Int, X86::VFNMSUBPDr132mY_Int, TB_ALIGN_32 }, { X86::VFMADDSUBPSr231r, X86::VFMADDSUBPSr231m, TB_ALIGN_16 }, { X86::VFMADDSUBPDr231r, X86::VFMADDSUBPDr231m, TB_ALIGN_16 }, @@ -1240,10 +1224,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr132mY, TB_ALIGN_32 }, { X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr213mY, TB_ALIGN_32 }, { X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr213mY, TB_ALIGN_32 }, - { X86::VFMADDSUBPSr132r_Int, X86::VFMADDSUBPSr132m_Int, TB_ALIGN_16 }, - { X86::VFMADDSUBPDr132r_Int, X86::VFMADDSUBPDr132m_Int, TB_ALIGN_16 }, - { X86::VFMADDSUBPSr132rY_Int, X86::VFMADDSUBPSr132mY_Int, TB_ALIGN_32 }, - { X86::VFMADDSUBPDr132rY_Int, X86::VFMADDSUBPDr132mY_Int, TB_ALIGN_32 }, { X86::VFMSUBADDPSr231r, X86::VFMSUBADDPSr231m, TB_ALIGN_16 }, { X86::VFMSUBADDPDr231r, X86::VFMSUBADDPDr231m, TB_ALIGN_16 }, @@ -1257,10 +1237,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 }, { X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 }, { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 }, - { X86::VFMSUBADDPSr132r_Int, X86::VFMSUBADDPSr132m_Int, TB_ALIGN_16 }, - { X86::VFMSUBADDPDr132r_Int, X86::VFMSUBADDPDr132m_Int, TB_ALIGN_16 }, - { X86::VFMSUBADDPSr132rY_Int, X86::VFMSUBADDPSr132mY_Int, TB_ALIGN_32 }, - { X86::VFMSUBADDPDr132rY_Int, X86::VFMSUBADDPDr132mY_Int, TB_ALIGN_32 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) { @@ -1318,8 +1294,7 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); switch (MI.getOpcode()) { - default: - llvm_unreachable(0); + default: llvm_unreachable("Unreachable!"); case X86::MOVSX16rr8: case X86::MOVZX16rr8: case X86::MOVSX32rr8: @@ -1463,6 +1438,9 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, /// regIsPICBase - Return true if register is PIC base (i.e.g defined by /// X86::MOVPC32r. static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { + // Don't waste compile time scanning use-def chains of physregs. + if (!TargetRegisterInfo::isVirtualRegister(BaseReg)) + return false; bool isPICBase = false; for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), E = MRI.def_end(); I != E; ++I) { @@ -1480,78 +1458,69 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, AliasAnalysis *AA) const { switch (MI->getOpcode()) { default: break; - case X86::MOV8rm: - case X86::MOV16rm: - case X86::MOV32rm: - case X86::MOV64rm: - case X86::LD_Fp64m: - case X86::MOVSSrm: - case X86::MOVSDrm: - case X86::MOVAPSrm: - case X86::MOVUPSrm: - case X86::MOVAPDrm: - case X86::MOVDQArm: - case X86::VMOVSSrm: - case X86::VMOVSDrm: - case X86::VMOVAPSrm: - case X86::VMOVUPSrm: - case X86::VMOVAPDrm: - case X86::VMOVDQArm: - case X86::VMOVAPSYrm: - case X86::VMOVUPSYrm: - case X86::VMOVAPDYrm: - case X86::VMOVDQAYrm: - case X86::MMX_MOVD64rm: - case X86::MMX_MOVQ64rm: - case X86::FsVMOVAPSrm: - case X86::FsVMOVAPDrm: - case X86::FsMOVAPSrm: - case X86::FsMOVAPDrm: { - // Loads from constant pools are trivially rematerializable. - if (MI->getOperand(1).isReg() && - MI->getOperand(2).isImm() && - MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && - MI->isInvariantLoad(AA)) { - unsigned BaseReg = MI->getOperand(1).getReg(); - if (BaseReg == 0 || BaseReg == X86::RIP) - return true; - // Allow re-materialization of PIC load. - if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) - return false; - const MachineFunction &MF = *MI->getParent()->getParent(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); - bool isPICBase = false; - for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), - E = MRI.def_end(); I != E; ++I) { - MachineInstr *DefMI = I.getOperand().getParent(); - if (DefMI->getOpcode() != X86::MOVPC32r) - return false; - assert(!isPICBase && "More than one PIC base?"); - isPICBase = true; - } - return isPICBase; - } - return false; + case X86::MOV8rm: + case X86::MOV16rm: + case X86::MOV32rm: + case X86::MOV64rm: + case X86::LD_Fp64m: + case X86::MOVSSrm: + case X86::MOVSDrm: + case X86::MOVAPSrm: + case X86::MOVUPSrm: + case X86::MOVAPDrm: + case X86::MOVDQArm: + case X86::VMOVSSrm: + case X86::VMOVSDrm: + case X86::VMOVAPSrm: + case X86::VMOVUPSrm: + case X86::VMOVAPDrm: + case X86::VMOVDQArm: + case X86::VMOVAPSYrm: + case X86::VMOVUPSYrm: + case X86::VMOVAPDYrm: + case X86::VMOVDQAYrm: + case X86::MMX_MOVD64rm: + case X86::MMX_MOVQ64rm: + case X86::FsVMOVAPSrm: + case X86::FsVMOVAPDrm: + case X86::FsMOVAPSrm: + case X86::FsMOVAPDrm: { + // Loads from constant pools are trivially rematerializable. + if (MI->getOperand(1).isReg() && + MI->getOperand(2).isImm() && + MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && + MI->isInvariantLoad(AA)) { + unsigned BaseReg = MI->getOperand(1).getReg(); + if (BaseReg == 0 || BaseReg == X86::RIP) + return true; + // Allow re-materialization of PIC load. + if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) + return false; + const MachineFunction &MF = *MI->getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + return regIsPICBase(BaseReg, MRI); } + return false; + } - case X86::LEA32r: - case X86::LEA64r: { - if (MI->getOperand(2).isImm() && - MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && - !MI->getOperand(4).isReg()) { - // lea fi#, lea GV, etc. are all rematerializable. - if (!MI->getOperand(1).isReg()) - return true; - unsigned BaseReg = MI->getOperand(1).getReg(); - if (BaseReg == 0) - return true; - // Allow re-materialization of lea PICBase + x. - const MachineFunction &MF = *MI->getParent()->getParent(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); - return regIsPICBase(BaseReg, MRI); - } - return false; - } + case X86::LEA32r: + case X86::LEA64r: { + if (MI->getOperand(2).isImm() && + MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && + !MI->getOperand(4).isReg()) { + // lea fi#, lea GV, etc. are all rematerializable. + if (!MI->getOperand(1).isReg()) + return true; + unsigned BaseReg = MI->getOperand(1).getReg(); + if (BaseReg == 0) + return true; + // Allow re-materialization of lea PICBase + x. + const MachineFunction &MF = *MI->getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + return regIsPICBase(BaseReg, MRI); + } + return false; + } } // All other instructions marked M_REMATERIALIZABLE are always trivially @@ -1660,7 +1629,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, case X86::MOV64r0: { if (!isSafeToClobberEFLAGS(MBB, I)) { switch (Opc) { - default: break; + default: llvm_unreachable("Unreachable!"); case X86::MOV8r0: Opc = X86::MOV8ri; break; case X86::MOV16r0: Opc = X86::MOV16ri; break; case X86::MOV32r0: Opc = X86::MOV32ri; break; @@ -1733,8 +1702,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg); switch (MIOpc) { - default: - llvm_unreachable(0); + default: llvm_unreachable("Unreachable!"); case X86::SHL16ri: { unsigned ShAmt = MI->getOperand(2).getImm(); MIB.addReg(0).addImm(1 << ShAmt) @@ -2126,57 +2094,25 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { MI->getOperand(3).setImm(Size-Amt); return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); } - case X86::CMOVB16rr: - case X86::CMOVB32rr: - case X86::CMOVB64rr: - case X86::CMOVAE16rr: - case X86::CMOVAE32rr: - case X86::CMOVAE64rr: - case X86::CMOVE16rr: - case X86::CMOVE32rr: - case X86::CMOVE64rr: - case X86::CMOVNE16rr: - case X86::CMOVNE32rr: - case X86::CMOVNE64rr: - case X86::CMOVBE16rr: - case X86::CMOVBE32rr: - case X86::CMOVBE64rr: - case X86::CMOVA16rr: - case X86::CMOVA32rr: - case X86::CMOVA64rr: - case X86::CMOVL16rr: - case X86::CMOVL32rr: - case X86::CMOVL64rr: - case X86::CMOVGE16rr: - case X86::CMOVGE32rr: - case X86::CMOVGE64rr: - case X86::CMOVLE16rr: - case X86::CMOVLE32rr: - case X86::CMOVLE64rr: - case X86::CMOVG16rr: - case X86::CMOVG32rr: - case X86::CMOVG64rr: - case X86::CMOVS16rr: - case X86::CMOVS32rr: - case X86::CMOVS64rr: - case X86::CMOVNS16rr: - case X86::CMOVNS32rr: - case X86::CMOVNS64rr: - case X86::CMOVP16rr: - case X86::CMOVP32rr: - case X86::CMOVP64rr: - case X86::CMOVNP16rr: - case X86::CMOVNP32rr: - case X86::CMOVNP64rr: - case X86::CMOVO16rr: - case X86::CMOVO32rr: - case X86::CMOVO64rr: - case X86::CMOVNO16rr: - case X86::CMOVNO32rr: - case X86::CMOVNO64rr: { - unsigned Opc = 0; + case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr: + case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr: + case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr: + case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr: + case X86::CMOVBE16rr: case X86::CMOVBE32rr: case X86::CMOVBE64rr: + case X86::CMOVA16rr: case X86::CMOVA32rr: case X86::CMOVA64rr: + case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr: + case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr: + case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr: + case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr: + case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr: + case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr: + case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr: + case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr: + case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr: + case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: { + unsigned Opc; switch (MI->getOpcode()) { - default: break; + default: llvm_unreachable("Unreachable!"); case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break; case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break; case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break; @@ -2408,7 +2344,7 @@ static X86::CondCode getSwappedCondition(X86::CondCode CC) { /// whether it has memory operand. static unsigned getSETFromCond(X86::CondCode CC, bool HasMemoryOperand) { - static const unsigned Opc[16][2] = { + static const uint16_t Opc[16][2] = { { X86::SETAr, X86::SETAm }, { X86::SETAEr, X86::SETAEm }, { X86::SETBr, X86::SETBm }, @@ -2435,7 +2371,7 @@ static unsigned getSETFromCond(X86::CondCode CC, /// register size in bytes, and operand type. static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes, bool HasMemoryOperand) { - static const unsigned Opc[32][3] = { + static const uint16_t Opc[32][3] = { { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr }, { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr }, { X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr }, @@ -2768,19 +2704,18 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, // SrcReg(GR64) -> DestReg(VR64) if (X86::GR64RegClass.contains(DestReg)) { - if (X86::VR128RegClass.contains(SrcReg)) { + if (X86::VR128RegClass.contains(SrcReg)) // Copy from a VR128 register to a GR64 register. return HasAVX ? X86::VMOVPQIto64rr : X86::MOVPQIto64rr; - } else if (X86::VR64RegClass.contains(SrcReg)) { + if (X86::VR64RegClass.contains(SrcReg)) // Copy from a VR64 register to a GR64 register. return X86::MOVSDto64rr; - } } else if (X86::GR64RegClass.contains(SrcReg)) { // Copy from a GR64 register to a VR128 register. if (X86::VR128RegClass.contains(DestReg)) return HasAVX ? X86::VMOV64toPQIrr : X86::MOV64toPQIrr; // Copy from a GR64 register to a VR64 register. - else if (X86::VR64RegClass.contains(DestReg)) + if (X86::VR64RegClass.contains(DestReg)) return X86::MOV64toSDrr; } @@ -2788,12 +2723,12 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, // SrcReg(GR32) -> DestReg(FR32) if (X86::GR32RegClass.contains(DestReg) && X86::FR32RegClass.contains(SrcReg)) - // Copy from a FR32 register to a GR32 register. - return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr; + // Copy from a FR32 register to a GR32 register. + return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr; if (X86::FR32RegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg)) - // Copy from a GR32 register to a FR32 register. - return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr; + // Copy from a GR32 register to a FR32 register. + return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr; return 0; } @@ -2804,7 +2739,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, bool KillSrc) const { // First deal with the normal symmetric copies. bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); - unsigned Opc = 0; + unsigned Opc; if (X86::GR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MOV64rr; else if (X86::GR32RegClass.contains(DestReg, SrcReg)) @@ -2843,7 +2778,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(X86::PUSHF64)); BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); return; - } else if (X86::GR32RegClass.contains(DestReg)) { + } + if (X86::GR32RegClass.contains(DestReg)) { BuildMI(MBB, MI, DL, get(X86::PUSHF32)); BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); return; @@ -2855,7 +2791,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(KillSrc)); BuildMI(MBB, MI, DL, get(X86::POPF64)); return; - } else if (X86::GR32RegClass.contains(SrcReg)) { + } + if (X86::GR32RegClass.contains(SrcReg)) { BuildMI(MBB, MI, DL, get(X86::PUSH32r)) .addReg(SrcReg, getKillRegState(KillSrc)); BuildMI(MBB, MI, DL, get(X86::POPF32)); @@ -3037,6 +2974,37 @@ analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2, CmpMask = ~0; CmpValue = MI->getOperand(1).getImm(); return true; + // A SUB can be used to perform comparison. + case X86::SUB64rm: + case X86::SUB32rm: + case X86::SUB16rm: + case X86::SUB8rm: + SrcReg = MI->getOperand(1).getReg(); + SrcReg2 = 0; + CmpMask = ~0; + CmpValue = 0; + return true; + case X86::SUB64rr: + case X86::SUB32rr: + case X86::SUB16rr: + case X86::SUB8rr: + SrcReg = MI->getOperand(1).getReg(); + SrcReg2 = MI->getOperand(2).getReg(); + CmpMask = ~0; + CmpValue = 0; + return true; + case X86::SUB64ri32: + case X86::SUB64ri8: + case X86::SUB32ri: + case X86::SUB32ri8: + case X86::SUB16ri: + case X86::SUB16ri8: + case X86::SUB8ri: + SrcReg = MI->getOperand(1).getReg(); + SrcReg2 = 0; + CmpMask = ~0; + CmpValue = MI->getOperand(2).getImm(); + return true; case X86::CMP64rr: case X86::CMP32rr: case X86::CMP16rr: @@ -3145,6 +3113,55 @@ bool X86InstrInfo:: optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const { + // Check whether we can replace SUB with CMP. + unsigned NewOpcode = 0; + switch (CmpInstr->getOpcode()) { + default: break; + case X86::SUB64ri32: + case X86::SUB64ri8: + case X86::SUB32ri: + case X86::SUB32ri8: + case X86::SUB16ri: + case X86::SUB16ri8: + case X86::SUB8ri: + case X86::SUB64rm: + case X86::SUB32rm: + case X86::SUB16rm: + case X86::SUB8rm: + case X86::SUB64rr: + case X86::SUB32rr: + case X86::SUB16rr: + case X86::SUB8rr: { + if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg())) + return false; + // There is no use of the destination register, we can replace SUB with CMP. + switch (CmpInstr->getOpcode()) { + default: llvm_unreachable("Unreachable!"); + case X86::SUB64rm: NewOpcode = X86::CMP64rm; break; + case X86::SUB32rm: NewOpcode = X86::CMP32rm; break; + case X86::SUB16rm: NewOpcode = X86::CMP16rm; break; + case X86::SUB8rm: NewOpcode = X86::CMP8rm; break; + case X86::SUB64rr: NewOpcode = X86::CMP64rr; break; + case X86::SUB32rr: NewOpcode = X86::CMP32rr; break; + case X86::SUB16rr: NewOpcode = X86::CMP16rr; break; + case X86::SUB8rr: NewOpcode = X86::CMP8rr; break; + case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break; + case X86::SUB64ri8: NewOpcode = X86::CMP64ri8; break; + case X86::SUB32ri: NewOpcode = X86::CMP32ri; break; + case X86::SUB32ri8: NewOpcode = X86::CMP32ri8; break; + case X86::SUB16ri: NewOpcode = X86::CMP16ri; break; + case X86::SUB16ri8: NewOpcode = X86::CMP16ri8; break; + case X86::SUB8ri: NewOpcode = X86::CMP8ri; break; + } + CmpInstr->setDesc(get(NewOpcode)); + CmpInstr->RemoveOperand(0); + // Fall through to optimize Cmp if Cmp is CMPrr or CMPri. + if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm || + NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm) + return false; + } + } + // Get the unique definition of SrcReg. MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); if (!MI) return false; @@ -3221,12 +3238,15 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, MachineBasicBlock::iterator E = CmpInstr->getParent()->end(); for (++I; I != E; ++I) { const MachineInstr &Instr = *I; - if (Instr.modifiesRegister(X86::EFLAGS, TRI)) { + bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI); + bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI); + // We should check the usage if this instruction uses and updates EFLAGS. + if (!UseEFLAGS && ModifyEFLAGS) { // It is safe to remove CmpInstr if EFLAGS is updated again. IsSafe = true; break; } - if (!Instr.readsRegister(X86::EFLAGS, TRI)) + if (!UseEFLAGS && !ModifyEFLAGS) continue; // EFLAGS is used by this instruction. @@ -3281,7 +3301,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // instructions will be modified. OpsToUpdate.push_back(std::make_pair(&*I, NewOpc)); } - if (Instr.killsRegister(X86::EFLAGS, TRI)) { + if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) { + // It is safe to remove CmpInstr if EFLAGS is updated again or killed. IsSafe = true; break; } @@ -3319,6 +3340,81 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, return true; } +/// optimizeLoadInstr - Try to remove the load by folding it to a register +/// operand at the use. We fold the load instructions if load defines a virtual +/// register, the virtual register is used once in the same BB, and the +/// instructions in-between do not load or store, and have no side effects. +MachineInstr* X86InstrInfo:: +optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI, + unsigned &FoldAsLoadDefReg, + MachineInstr *&DefMI) const { + if (FoldAsLoadDefReg == 0) + return 0; + // To be conservative, if there exists another load, clear the load candidate. + if (MI->mayLoad()) { + FoldAsLoadDefReg = 0; + return 0; + } + + // Check whether we can move DefMI here. + DefMI = MRI->getVRegDef(FoldAsLoadDefReg); + assert(DefMI); + bool SawStore = false; + if (!DefMI->isSafeToMove(this, 0, SawStore)) + return 0; + + // We try to commute MI if possible. + unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1; + for (unsigned Idx = 0; Idx < IdxEnd; Idx++) { + // Collect information about virtual register operands of MI. + unsigned SrcOperandId = 0; + bool FoundSrcOperand = false; + for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (Reg != FoldAsLoadDefReg) + continue; + // Do not fold if we have a subreg use or a def or multiple uses. + if (MO.getSubReg() || MO.isDef() || FoundSrcOperand) + return 0; + + SrcOperandId = i; + FoundSrcOperand = true; + } + if (!FoundSrcOperand) return 0; + + // Check whether we can fold the def into SrcOperandId. + SmallVector<unsigned, 8> Ops; + Ops.push_back(SrcOperandId); + MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI); + if (FoldMI) { + FoldAsLoadDefReg = 0; + return FoldMI; + } + + if (Idx == 1) { + // MI was changed but it didn't help, commute it back! + commuteInstruction(MI, false); + return 0; + } + + // Check whether we can commute MI and enable folding. + if (MI->isCommutable()) { + MachineInstr *NewMI = commuteInstruction(MI, false); + // Unable to commute. + if (!NewMI) return 0; + if (NewMI != MI) { + // New instruction. It doesn't need to be kept. + NewMI->eraseFromParent(); + return 0; + } + } + } + return 0; +} + /// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr /// instruction with two undef reads of the register being defined. This is /// used for mapping: @@ -3477,6 +3573,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, OpcodeTablePtr = &RegOp2MemOpTable1; } else if (i == 2) { OpcodeTablePtr = &RegOp2MemOpTable2; + } else if (i == 3) { + OpcodeTablePtr = &RegOp2MemOpTable3; } // If table selected... @@ -3947,7 +4045,6 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, getUndefRegState(MO.isUndef())); } // Change CMP32ri r, 0 back to TEST32rr r, r, etc. - unsigned NewOpc = 0; switch (DataMI->getOpcode()) { default: break; case X86::CMP64ri32: @@ -3960,8 +4057,9 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, MachineOperand &MO0 = DataMI->getOperand(0); MachineOperand &MO1 = DataMI->getOperand(1); if (MO1.getImm() == 0) { + unsigned NewOpc; switch (DataMI->getOpcode()) { - default: break; + default: llvm_unreachable("Unreachable!"); case X86::CMP64ri8: case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; case X86::CMP32ri8: |
