From 43dbe05279b753aabda571d9c83eaeb36987001a Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Mon, 7 Jan 2008 01:35:02 +0000 Subject: Move even more functionality from MRegisterInfo into TargetInstrInfo. Some day I'll get it all moved over... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45672 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/MRegisterInfo.h | 52 -- include/llvm/Target/TargetInstrInfo.h | 54 ++ lib/Target/ARM/ARMInstrInfo.cpp | 113 ++++ lib/Target/ARM/ARMInstrInfo.h | 14 + lib/Target/ARM/ARMRegisterInfo.cpp | 196 ++---- lib/Target/ARM/ARMRegisterInfo.h | 15 +- lib/Target/Alpha/AlphaInstrInfo.cpp | 37 ++ lib/Target/Alpha/AlphaInstrInfo.h | 11 + lib/Target/Alpha/AlphaRegisterInfo.cpp | 37 -- lib/Target/Alpha/AlphaRegisterInfo.h | 10 - lib/Target/CellSPU/SPUInstrInfo.cpp | 39 ++ lib/Target/CellSPU/SPUInstrInfo.h | 14 +- lib/Target/CellSPU/SPURegisterInfo.cpp | 48 -- lib/Target/CellSPU/SPURegisterInfo.h | 10 - lib/Target/Mips/MipsInstrInfo.cpp | 31 + lib/Target/Mips/MipsInstrInfo.h | 11 + lib/Target/Mips/MipsRegisterInfo.cpp | 31 - lib/Target/Mips/MipsRegisterInfo.h | 10 - lib/Target/PowerPC/PPCInstrInfo.cpp | 79 +++ lib/Target/PowerPC/PPCInstrInfo.h | 15 + lib/Target/PowerPC/PPCRegisterInfo.cpp | 79 --- lib/Target/PowerPC/PPCRegisterInfo.h | 15 - lib/Target/Sparc/SparcInstrInfo.cpp | 38 ++ lib/Target/Sparc/SparcInstrInfo.h | 10 + lib/Target/Sparc/SparcRegisterInfo.cpp | 38 -- lib/Target/Sparc/SparcRegisterInfo.h | 10 - lib/Target/X86/X86InstrInfo.cpp | 1031 ++++++++++++++++++++++++++++++ lib/Target/X86/X86InstrInfo.h | 56 ++ lib/Target/X86/X86RegisterInfo.cpp | 1070 -------------------------------- lib/Target/X86/X86RegisterInfo.h | 55 -- 30 files changed, 1599 insertions(+), 1630 deletions(-) diff --git a/include/llvm/Target/MRegisterInfo.h b/include/llvm/Target/MRegisterInfo.h index 1faa0b5..671ce0a 100644 --- a/include/llvm/Target/MRegisterInfo.h +++ b/include/llvm/Target/MRegisterInfo.h @@ -484,58 +484,6 @@ public: unsigned DestReg, const MachineInstr *Orig) const = 0; - /// foldMemoryOperand - Attempt to fold a load or store of the specified stack - /// slot into the specified machine instruction for the specified operand(s). - /// If this is possible, a new instruction is returned with the specified - /// operand folded, otherwise NULL is returned. The client is responsible for - /// removing the old instruction and adding the new one in the instruction - /// stream. - virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, - SmallVectorImpl &Ops, - int FrameIndex) const { - return 0; - } - - /// foldMemoryOperand - Same as the previous version except it allows folding - /// of any load and store from / to any address, not just from a specific - /// stack slot. - virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, - SmallVectorImpl &Ops, - MachineInstr* LoadMI) const { - return 0; - } - - /// canFoldMemoryOperand - Returns true if the specified load / store is - /// folding is possible. - virtual - bool canFoldMemoryOperand(MachineInstr *MI, - SmallVectorImpl &Ops) const{ - return false; - } - - /// unfoldMemoryOperand - Separate a single instruction which folded a load or - /// a store or a load and a store into two or more instruction. If this is - /// possible, returns true as well as the new instructions by reference. - virtual bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, - unsigned Reg, bool UnfoldLoad, bool UnfoldStore, - SmallVectorImpl &NewMIs) const{ - return false; - } - - virtual bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, - SmallVectorImpl &NewNodes) const { - return false; - } - - /// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new - /// instruction after load / store are unfolded from an instruction of the - /// specified opcode. It returns zero if the specified unfolding is not - /// possible. - virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, - bool UnfoldLoad, bool UnfoldStore) const { - return 0; - } - /// targetHandlesStackFrameRounding - Returns true if the target is /// responsible for rounding up the stack frame (probably at emitPrologue /// time). diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index f2a091b..f7e7b3c 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -27,6 +27,8 @@ class TargetMachine; class TargetRegisterClass; class LiveVariables; class CalleeSavedInfo; +class SDNode; +class SelectionDAG; template class SmallVectorImpl; @@ -540,6 +542,58 @@ public: return false; } + /// foldMemoryOperand - Attempt to fold a load or store of the specified stack + /// slot into the specified machine instruction for the specified operand(s). + /// If this is possible, a new instruction is returned with the specified + /// operand folded, otherwise NULL is returned. The client is responsible for + /// removing the old instruction and adding the new one in the instruction + /// stream. + virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, + SmallVectorImpl &Ops, + int FrameIndex) const { + return 0; + } + + /// foldMemoryOperand - Same as the previous version except it allows folding + /// of any load and store from / to any address, not just from a specific + /// stack slot. + virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, + SmallVectorImpl &Ops, + MachineInstr* LoadMI) const { + return 0; + } + + /// canFoldMemoryOperand - Returns true if the specified load / store is + /// folding is possible. + virtual + bool canFoldMemoryOperand(MachineInstr *MI, + SmallVectorImpl &Ops) const{ + return false; + } + + /// unfoldMemoryOperand - Separate a single instruction which folded a load or + /// a store or a load and a store into two or more instruction. If this is + /// possible, returns true as well as the new instructions by reference. + virtual bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, + unsigned Reg, bool UnfoldLoad, bool UnfoldStore, + SmallVectorImpl &NewMIs) const{ + return false; + } + + virtual bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, + SmallVectorImpl &NewNodes) const { + return false; + } + + /// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new + /// instruction after load / store are unfolded from an instruction of the + /// specified opcode. It returns zero if the specified unfolding is not + /// possible. + virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, + bool UnfoldLoad, bool UnfoldStore) const { + return 0; + } + /// BlockHasNoFallThrough - Return true if the specified block does not /// fall-through into its successor block. This is primarily used when a /// branch is unanalyzable. It is useful for things like unconditional diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index aa0109f..f40be58 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -643,6 +643,119 @@ bool ARMInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } +MachineInstr *ARMInstrInfo::foldMemoryOperand(MachineInstr *MI, + SmallVectorImpl &Ops, + int FI) const { + if (Ops.size() != 1) return NULL; + + unsigned OpNum = Ops[0]; + unsigned Opc = MI->getOpcode(); + MachineInstr *NewMI = NULL; + switch (Opc) { + default: break; + case ARM::MOVr: { + if (MI->getOperand(4).getReg() == ARM::CPSR) + // If it is updating CPSR, then it cannot be foled. + break; + unsigned Pred = MI->getOperand(2).getImm(); + unsigned PredReg = MI->getOperand(3).getReg(); + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + NewMI = BuildMI(get(ARM::STR)).addReg(SrcReg).addFrameIndex(FI) + .addReg(0).addImm(0).addImm(Pred).addReg(PredReg); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + NewMI = BuildMI(get(ARM::LDR), DstReg).addFrameIndex(FI).addReg(0) + .addImm(0).addImm(Pred).addReg(PredReg); + } + break; + } + case ARM::tMOVr: { + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg)) + // tSpill cannot take a high register operand. + break; + NewMI = BuildMI(get(ARM::tSpill)).addReg(SrcReg).addFrameIndex(FI) + .addImm(0); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg)) + // tRestore cannot target a high register operand. + break; + NewMI = BuildMI(get(ARM::tRestore), DstReg).addFrameIndex(FI) + .addImm(0); + } + break; + } + case ARM::FCPYS: { + unsigned Pred = MI->getOperand(2).getImm(); + unsigned PredReg = MI->getOperand(3).getReg(); + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + NewMI = BuildMI(get(ARM::FSTS)).addReg(SrcReg).addFrameIndex(FI) + .addImm(0).addImm(Pred).addReg(PredReg); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + NewMI = BuildMI(get(ARM::FLDS), DstReg).addFrameIndex(FI) + .addImm(0).addImm(Pred).addReg(PredReg); + } + break; + } + case ARM::FCPYD: { + unsigned Pred = MI->getOperand(2).getImm(); + unsigned PredReg = MI->getOperand(3).getReg(); + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + NewMI = BuildMI(get(ARM::FSTD)).addReg(SrcReg).addFrameIndex(FI) + .addImm(0).addImm(Pred).addReg(PredReg); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + NewMI = BuildMI(get(ARM::FLDD), DstReg).addFrameIndex(FI) + .addImm(0).addImm(Pred).addReg(PredReg); + } + break; + } + } + + if (NewMI) + NewMI->copyKillDeadInfo(MI); + return NewMI; +} + +bool ARMInstrInfo::canFoldMemoryOperand(MachineInstr *MI, + SmallVectorImpl &Ops) const { + if (Ops.size() != 1) return false; + + unsigned OpNum = Ops[0]; + unsigned Opc = MI->getOpcode(); + switch (Opc) { + default: break; + case ARM::MOVr: + // If it is updating CPSR, then it cannot be foled. + return MI->getOperand(4).getReg() != ARM::CPSR; + case ARM::tMOVr: { + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg)) + // tSpill cannot take a high register operand. + return false; + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg)) + // tRestore cannot target a high register operand. + return false; + } + return true; + } + case ARM::FCPYS: + case ARM::FCPYD: + return true; + } + + return false; +} + bool ARMInstrInfo::BlockHasNoFallThrough(MachineBasicBlock &MBB) const { if (MBB.empty()) return false; diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index a1cd821..60d9640 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -190,6 +190,20 @@ public: virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI) const; + + virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, + SmallVectorImpl &Ops, + int FrameIndex) const; + + virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, + SmallVectorImpl &Ops, + MachineInstr* LoadMI) const { + return 0; + } + + virtual bool canFoldMemoryOperand(MachineInstr *MI, + SmallVectorImpl &Ops) const; + virtual bool BlockHasNoFallThrough(MachineBasicBlock &MBB) const; virtual bool ReverseBranchCondition(std::vector &Cond) const; diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp index 3160262..82aa74e 100644 --- a/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -136,7 +136,7 @@ void ARMRegisterInfo::reMaterialize(MachineBasicBlock &MBB, /// isLowRegister - Returns true if the register is low register r0-r7. /// -static bool isLowRegister(unsigned Reg) { +bool ARMRegisterInfo::isLowRegister(unsigned Reg) const { using namespace ARM; switch (Reg) { case R0: case R1: case R2: case R3: @@ -147,119 +147,6 @@ static bool isLowRegister(unsigned Reg) { } } -MachineInstr *ARMRegisterInfo::foldMemoryOperand(MachineInstr *MI, - SmallVectorImpl &Ops, - int FI) const { - if (Ops.size() != 1) return NULL; - - unsigned OpNum = Ops[0]; - unsigned Opc = MI->getOpcode(); - MachineInstr *NewMI = NULL; - switch (Opc) { - default: break; - case ARM::MOVr: { - if (MI->getOperand(4).getReg() == ARM::CPSR) - // If it is updating CPSR, then it cannot be foled. - break; - unsigned Pred = MI->getOperand(2).getImm(); - unsigned PredReg = MI->getOperand(3).getReg(); - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - NewMI = BuildMI(TII.get(ARM::STR)).addReg(SrcReg).addFrameIndex(FI) - .addReg(0).addImm(0).addImm(Pred).addReg(PredReg); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - NewMI = BuildMI(TII.get(ARM::LDR), DstReg).addFrameIndex(FI).addReg(0) - .addImm(0).addImm(Pred).addReg(PredReg); - } - break; - } - case ARM::tMOVr: { - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - if (isPhysicalRegister(SrcReg) && !isLowRegister(SrcReg)) - // tSpill cannot take a high register operand. - break; - NewMI = BuildMI(TII.get(ARM::tSpill)).addReg(SrcReg).addFrameIndex(FI) - .addImm(0); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - if (isPhysicalRegister(DstReg) && !isLowRegister(DstReg)) - // tRestore cannot target a high register operand. - break; - NewMI = BuildMI(TII.get(ARM::tRestore), DstReg).addFrameIndex(FI) - .addImm(0); - } - break; - } - case ARM::FCPYS: { - unsigned Pred = MI->getOperand(2).getImm(); - unsigned PredReg = MI->getOperand(3).getReg(); - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - NewMI = BuildMI(TII.get(ARM::FSTS)).addReg(SrcReg).addFrameIndex(FI) - .addImm(0).addImm(Pred).addReg(PredReg); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - NewMI = BuildMI(TII.get(ARM::FLDS), DstReg).addFrameIndex(FI) - .addImm(0).addImm(Pred).addReg(PredReg); - } - break; - } - case ARM::FCPYD: { - unsigned Pred = MI->getOperand(2).getImm(); - unsigned PredReg = MI->getOperand(3).getReg(); - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - NewMI = BuildMI(TII.get(ARM::FSTD)).addReg(SrcReg).addFrameIndex(FI) - .addImm(0).addImm(Pred).addReg(PredReg); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - NewMI = BuildMI(TII.get(ARM::FLDD), DstReg).addFrameIndex(FI) - .addImm(0).addImm(Pred).addReg(PredReg); - } - break; - } - } - - if (NewMI) - NewMI->copyKillDeadInfo(MI); - return NewMI; -} - -bool ARMRegisterInfo::canFoldMemoryOperand(MachineInstr *MI, - SmallVectorImpl &Ops) const { - if (Ops.size() != 1) return false; - - unsigned OpNum = Ops[0]; - unsigned Opc = MI->getOpcode(); - switch (Opc) { - default: break; - case ARM::MOVr: - // If it is updating CPSR, then it cannot be foled. - return MI->getOperand(4).getReg() != ARM::CPSR; - case ARM::tMOVr: { - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - if (isPhysicalRegister(SrcReg) && !isLowRegister(SrcReg)) - // tSpill cannot take a high register operand. - return false; - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - if (isPhysicalRegister(DstReg) && !isLowRegister(DstReg)) - // tRestore cannot target a high register operand. - return false; - } - return true; - } - case ARM::FCPYS: - case ARM::FCPYD: - return true; - } - - return false; -} - const unsigned* ARMRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { static const unsigned CalleeSavedRegs[] = { @@ -426,12 +313,13 @@ static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes, /// constpool entry. static void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned DestReg, unsigned BaseReg, - int NumBytes, bool CanChangeCC, - const TargetInstrInfo &TII) { - bool isHigh = !isLowRegister(DestReg) || - (BaseReg != 0 && !isLowRegister(BaseReg)); + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, unsigned BaseReg, + int NumBytes, bool CanChangeCC, + const TargetInstrInfo &TII, + const ARMRegisterInfo& MRI) { + bool isHigh = !MRI.isLowRegister(DestReg) || + (BaseReg != 0 && !MRI.isLowRegister(BaseReg)); bool isSub = false; // Subtract doesn't have high register version. Load the negative value // if either base or dest register is a high register. Also, if do not @@ -476,7 +364,8 @@ static void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, unsigned DestReg, unsigned BaseReg, - int NumBytes, const TargetInstrInfo &TII) { + int NumBytes, const TargetInstrInfo &TII, + const ARMRegisterInfo& MRI) { bool isSub = NumBytes < 0; unsigned Bytes = (unsigned)NumBytes; if (isSub) Bytes = -NumBytes; @@ -522,12 +411,12 @@ void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, if (NumMIs > Threshold) { // This will expand into too many instructions. Load the immediate from a // constpool entry. - emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII); + emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII, MRI); return; } if (DstNotEqBase) { - if (isLowRegister(DestReg) && isLowRegister(BaseReg)) { + if (MRI.isLowRegister(DestReg) && MRI.isLowRegister(BaseReg)) { // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7) unsigned Chunk = (1 << 3) - 1; unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; @@ -577,9 +466,10 @@ void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, static void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, - bool isThumb, const TargetInstrInfo &TII) { + bool isThumb, const TargetInstrInfo &TII, + const ARMRegisterInfo& MRI) { if (isThumb) - emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII); + emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII, MRI); else emitARMRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, Pred, PredReg, TII); @@ -610,12 +500,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. unsigned PredReg = isThumb ? 0 : Old->getOperand(2).getReg(); - emitSPUpdate(MBB, I, -Amount, Pred, PredReg, isThumb, TII); + emitSPUpdate(MBB, I, -Amount, Pred, PredReg, isThumb, TII, *this); } else { // Note: PredReg is operand 3 for ADJCALLSTACKUP. unsigned PredReg = isThumb ? 0 : Old->getOperand(3).getReg(); assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); - emitSPUpdate(MBB, I, Amount, Pred, PredReg, isThumb, TII); + emitSPUpdate(MBB, I, Amount, Pred, PredReg, isThumb, TII, *this); } } } @@ -627,7 +517,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, static void emitThumbConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, unsigned DestReg, int Imm, - const TargetInstrInfo &TII) { + const TargetInstrInfo &TII, + const ARMRegisterInfo& MRI) { bool isSub = Imm < 0; if (isSub) Imm = -Imm; @@ -636,7 +527,7 @@ static void emitThumbConstant(MachineBasicBlock &MBB, Imm -= ThisVal; BuildMI(MBB, MBBI, TII.get(ARM::tMOVi8), DestReg).addImm(ThisVal); if (Imm > 0) - emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII); + emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI); if (isSub) BuildMI(MBB, MBBI, TII.get(ARM::tNEG), DestReg) .addReg(DestReg, false, false, true); @@ -770,7 +661,7 @@ void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // MI would expand into a large number of instructions. Don't try to // simplify the immediate. if (NumMIs > 2) { - emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII); + emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII, *this); MBB.erase(II); return; } @@ -783,12 +674,12 @@ void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i+1).ChangeToImmediate(Mask); Offset = (Offset - Mask * Scale); MachineBasicBlock::iterator NII = next(II); - emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII); + emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII, *this); } else { // Translate r0 = add sp, -imm to // r0 = -imm (this is then translated into a series of instructons) // r0 = add r0, sp - emitThumbConstant(MBB, II, DestReg, Offset, TII); + emitThumbConstant(MBB, II, DestReg, Offset, TII, *this); MI.setInstrDescriptor(TII.get(ARM::tADDhirr)); MI.getOperand(i).ChangeToRegister(DestReg, false, false, true); MI.getOperand(i+1).ChangeToRegister(FrameReg, false); @@ -891,13 +782,14 @@ void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, bool UseRR = false; if (Opcode == ARM::tRestore) { if (FrameReg == ARM::SP) - emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,Offset,false,TII); + emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, + Offset, false, TII, *this); else { emitLoadConstPool(MBB, II, TmpReg, Offset, ARMCC::AL, 0, TII, true); UseRR = true; } } else - emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII); + emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, *this); MI.setInstrDescriptor(TII.get(ARM::tLDR)); MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); if (UseRR) @@ -927,13 +819,14 @@ void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, .addReg(ARM::R3, false, false, true); if (Opcode == ARM::tSpill) { if (FrameReg == ARM::SP) - emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,Offset,false,TII); + emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, + Offset, false, TII, *this); else { emitLoadConstPool(MBB, II, TmpReg, Offset, ARMCC::AL, 0, TII, true); UseRR = true; } } else - emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII); + emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, *this); MI.setInstrDescriptor(TII.get(ARM::tSTR)); MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); if (UseRR) // Use [reg, reg] addrmode. @@ -1266,11 +1159,11 @@ void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const { int FramePtrSpillFI = 0; if (VARegSaveSize) - emitSPUpdate(MBB, MBBI, -VARegSaveSize, ARMCC::AL, 0, isThumb, TII); + emitSPUpdate(MBB, MBBI, -VARegSaveSize, ARMCC::AL, 0, isThumb, TII, *this); if (!AFI->hasStackFrame()) { if (NumBytes != 0) - emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII); + emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII, *this); return; } @@ -1310,7 +1203,7 @@ void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const { if (!isThumb) { // Build the new SUBri to adjust SP for integer callee-save spill area 1. - emitSPUpdate(MBB, MBBI, -GPRCS1Size, ARMCC::AL, 0, isThumb, TII); + emitSPUpdate(MBB, MBBI, -GPRCS1Size, ARMCC::AL, 0, isThumb, TII, *this); movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 1, STI); } else if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) ++MBBI; @@ -1326,11 +1219,11 @@ void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const { if (!isThumb) { // Build the new SUBri to adjust SP for integer callee-save spill area 2. - emitSPUpdate(MBB, MBBI, -GPRCS2Size, ARMCC::AL, 0, false, TII); + emitSPUpdate(MBB, MBBI, -GPRCS2Size, ARMCC::AL, 0, false, TII, *this); // Build the new SUBri to adjust SP for FP callee-save spill area. movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 2, STI); - emitSPUpdate(MBB, MBBI, -DPRCSSize, ARMCC::AL, 0, false, TII); + emitSPUpdate(MBB, MBBI, -DPRCSSize, ARMCC::AL, 0, false, TII, *this); } // Determine starting offsets of spill areas. @@ -1347,7 +1240,7 @@ void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const { // Insert it after all the callee-save spills. if (!isThumb) movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 3, STI); - emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII); + emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII, *this); } if(STI.isTargetELF() && hasFP(MF)) { @@ -1390,7 +1283,7 @@ void ARMRegisterInfo::emitEpilogue(MachineFunction &MF, int NumBytes = (int)MFI->getStackSize(); if (!AFI->hasStackFrame()) { if (NumBytes != 0) - emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII); + emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII, *this); } else { // Unwind MBBI to point to first LDR / FLDD. const unsigned *CSRegs = getCalleeSavedRegs(); @@ -1412,7 +1305,8 @@ void ARMRegisterInfo::emitEpilogue(MachineFunction &MF, // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot or target is ELF and the function has FP. if (NumBytes) - emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes, TII); + emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes, + TII, *this); else BuildMI(MBB, MBBI, TII.get(ARM::tMOVr), ARM::SP).addReg(FramePtr); } else { @@ -1420,9 +1314,9 @@ void ARMRegisterInfo::emitEpilogue(MachineFunction &MF, &MBB.front() != MBBI && prior(MBBI)->getOpcode() == ARM::tPOP) { MachineBasicBlock::iterator PMBBI = prior(MBBI); - emitSPUpdate(MBB, PMBBI, NumBytes, ARMCC::AL, 0, isThumb, TII); + emitSPUpdate(MBB, PMBBI, NumBytes, ARMCC::AL, 0, isThumb, TII, *this); } else - emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII); + emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII, *this); } } else { // Darwin ABI requires FP to point to the stack slot that contains the @@ -1443,23 +1337,23 @@ void ARMRegisterInfo::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, TII.get(ARM::MOVr), ARM::SP).addReg(FramePtr) .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); } else if (NumBytes) { - emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, false, TII); + emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, false, TII, *this); } // Move SP to start of integer callee save spill area 2. movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 3, STI); emitSPUpdate(MBB, MBBI, AFI->getDPRCalleeSavedAreaSize(), ARMCC::AL, 0, - false, TII); + false, TII, *this); // Move SP to start of integer callee save spill area 1. movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 2, STI); emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea2Size(), ARMCC::AL, 0, - false, TII); + false, TII, *this); // Move SP to SP upon entry to the function. movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 1, STI); emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea1Size(), ARMCC::AL, 0, - false, TII); + false, TII, *this); } } @@ -1469,7 +1363,7 @@ void ARMRegisterInfo::emitEpilogue(MachineFunction &MF, // FIXME: Verify this is still ok when R3 is no longer being reserved. BuildMI(MBB, MBBI, TII.get(ARM::tPOP)).addReg(ARM::R3); - emitSPUpdate(MBB, MBBI, VARegSaveSize, ARMCC::AL, 0, isThumb, TII); + emitSPUpdate(MBB, MBBI, VARegSaveSize, ARMCC::AL, 0, isThumb, TII, *this); if (isThumb) { BuildMI(MBB, MBBI, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3); diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index 8ca7a9d..cf18e86 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -40,19 +40,6 @@ public: void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, const MachineInstr *Orig) const; - MachineInstr* foldMemoryOperand(MachineInstr* MI, - SmallVectorImpl &Ops, - int FrameIndex) const; - - MachineInstr* foldMemoryOperand(MachineInstr* MI, - SmallVectorImpl &Ops, - MachineInstr* LoadMI) const { - return 0; - } - - bool canFoldMemoryOperand(MachineInstr *MI, - SmallVectorImpl &Ops) const; - const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; const TargetRegisterClass* const* @@ -90,6 +77,8 @@ public: unsigned getEHHandlerRegister() const; int getDwarfRegNum(unsigned RegNum, bool isEH) const; + + bool isLowRegister(unsigned Reg) const; }; } // end namespace llvm diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp index 7c89ec8..abd7e33 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.cpp +++ b/lib/Target/Alpha/AlphaInstrInfo.cpp @@ -250,6 +250,43 @@ void AlphaInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, NewMIs.push_back(MIB); } +MachineInstr *AlphaInstrInfo::foldMemoryOperand(MachineInstr *MI, + SmallVectorImpl &Ops, + int FrameIndex) const { + if (Ops.size() != 1) return NULL; + + // Make sure this is a reg-reg copy. + unsigned Opc = MI->getOpcode(); + + MachineInstr *NewMI = NULL; + switch(Opc) { + default: + break; + case Alpha::BISr: + case Alpha::CPYSS: + case Alpha::CPYST: + if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { + if (Ops[0] == 0) { // move -> store + unsigned InReg = MI->getOperand(1).getReg(); + Opc = (Opc == Alpha::BISr) ? Alpha::STQ : + ((Opc == Alpha::CPYSS) ? Alpha::STS : Alpha::STT); + NewMI = BuildMI(get(Opc)).addReg(InReg).addFrameIndex(FrameIndex) + .addReg(Alpha::F31); + } else { // load -> move + unsigned OutReg = MI->getOperand(0).getReg(); + Opc = (Opc == Alpha::BISr) ? Alpha::LDQ : + ((Opc == Alpha::CPYSS) ? Alpha::LDS : Alpha::LDT); + NewMI = BuildMI(get(Opc), OutReg).addFrameIndex(FrameIndex) + .addReg(Alpha::F31); + } + } + break; + } + if (NewMI) + NewMI->copyKillDeadInfo(MI); + return 0; +} + static unsigned AlphaRevCondCode(unsigned Opcode) { switch (Opcode) { case Alpha::BEQ: return Alpha::BNE; diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h index 48e168f..3477ae0 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.h +++ b/lib/Target/Alpha/AlphaInstrInfo.h @@ -66,6 +66,17 @@ public: SmallVectorImpl &Addr, const TargetRegisterClass *RC, SmallVectorImpl &NewMIs) const; + + virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, + SmallVectorImpl &Ops, + int FrameIndex) const; + + virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, + SmallVectorImpl &Ops, + MachineInstr* LoadMI) const { + return 0; + } + bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, std::vector &Cond) const; diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp index b5b77fe..d749d35 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.cpp +++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp @@ -58,43 +58,6 @@ AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii) { } -MachineInstr *AlphaRegisterInfo::foldMemoryOperand(MachineInstr *MI, - SmallVectorImpl &Ops, - int FrameIndex) const { - if (Ops.size() != 1) return NULL; - - // Make sure this is a reg-reg copy. - unsigned Opc = MI->getOpcode(); - - MachineInstr *NewMI = NULL; - switch(Opc) { - default: - break; - case Alpha::BISr: - case Alpha::CPYSS: - case Alpha::CPYST: - if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { - if (Ops[0] == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - Opc = (Opc == Alpha::BISr) ? Alpha::STQ : - ((Opc == Alpha::CPYSS) ? Alpha::STS : Alpha::STT); - NewMI = BuildMI(TII.get(Opc)).addReg(InReg).addFrameIndex(FrameIndex) - .addReg(Alpha::F31); - } else { // load -> move - unsigned OutReg = MI->getOperand(0).getReg(); - Opc = (Opc == Alpha::BISr) ? Alpha::LDQ : - ((Opc == Alpha::CPYSS) ? Alpha::LDS : Alpha::LDT); - NewMI = BuildMI(TII.get(Opc), OutReg).addFrameIndex(FrameIndex) - .addReg(Alpha::F31); - } - } - break; - } - if (NewMI) - NewMI->copyKillDeadInfo(MI); - return 0; -} - void AlphaRegisterInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h index deabc7f..7a32bf5 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.h +++ b/lib/Target/Alpha/AlphaRegisterInfo.h @@ -28,16 +28,6 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo { AlphaRegisterInfo(const TargetInstrInfo &tii); /// Code Generation virtual methods... - MachineInstr* foldMemoryOperand(MachineInstr* MI, - SmallVectorImpl &Ops, - int FrameIndex) const; - - MachineInstr* foldMemoryOperand(MachineInstr* MI, - SmallVectorImpl &Ops, - MachineInstr* LoadMI) const { - return 0; - } - void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, const MachineInstr *Orig) const; diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index c7cbd9b..e9b263f 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -388,3 +388,42 @@ void SPUInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, } } +/// foldMemoryOperand - SPU, like PPC, can only fold spills into +/// copy instructions, turning them into load/store instructions. +MachineInstr * +SPUInstrInfo::foldMemoryOperand(MachineInstr *MI, + SmallVectorImpl &Ops, + int FrameIndex) const +{ +#if SOMEDAY_SCOTT_LOOKS_AT_ME_AGAIN + if (Ops.size() != 1) return NULL; + + unsigned OpNum = Ops[0]; + unsigned Opc = MI->getOpcode(); + MachineInstr *NewMI = 0; + + if ((Opc == SPU::ORr32 + || Opc == SPU::ORv4i32) + && MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { + if (OpNum == 0) { // move -> store + unsigned InReg = MI->getOperand(1).getReg(); + if (FrameIndex < SPUFrameInfo::maxFrameOffset()) { + NewMI = addFrameReference(BuildMI(TII.get(SPU::STQDr32)).addReg(InReg), + FrameIndex); + } + } else { // move -> load + unsigned OutReg = MI->getOperand(0).getReg(); + Opc = (FrameIndex < SPUFrameInfo::maxFrameOffset()) ? SPU::STQDr32 : SPU::STQXr32; + NewMI = addFrameReference(BuildMI(TII.get(Opc), OutReg), FrameIndex); + } + } + + if (NewMI) + NewMI->copyKillDeadInfo(MI); + + return NewMI; +#else + return 0; +#endif +} + diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h index 22581fc..e2ecf9b 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.h +++ b/lib/Target/CellSPU/SPUInstrInfo.h @@ -74,7 +74,19 @@ namespace llvm { virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl &Addr, const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; + SmallVectorImpl &NewMIs) const; + + //! Fold spills into load/store instructions + virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, + SmallVectorImpl &Ops, + int FrameIndex) const; + + //! Fold any load/store to an operand + virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, + SmallVectorImpl &Ops, + MachineInstr* LoadMI) const { + return 0; + } }; } diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index 3a0565d..90606cb 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -295,54 +295,6 @@ BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -/// foldMemoryOperand - SPU, like PPC, can only fold spills into -/// copy instructions, turning them into load/store instructions. -MachineInstr * -SPURegisterInfo::foldMemoryOperand(MachineInstr *MI, - SmallVectorImpl &Ops, - int FrameIndex) const -{ -#if SOMEDAY_SCOTT_LOOKS_AT_ME_AGAIN - if (Ops.size() != 1) return NULL; - - unsigned OpNum = Ops[0]; - unsigned Opc = MI->getOpcode(); - MachineInstr *NewMI = 0; - - if ((Opc == SPU::ORr32 - || Opc == SPU::ORv4i32) - && MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { - if (OpNum == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - if (FrameIndex < SPUFrameInfo::maxFrameOffset()) { - NewMI = addFrameReference(BuildMI(TII.get(SPU::STQDr32)).addReg(InReg), - FrameIndex); - } - } else { // move -> load - unsigned OutReg = MI->getOperand(0).getReg(); - Opc = (FrameIndex < SPUFrameInfo::maxFrameOffset()) ? SPU::STQDr32 : SPU::STQXr32; - NewMI = addFrameReference(BuildMI(TII.get(Opc), OutReg), FrameIndex); - } - } - - if (NewMI) - NewMI->copyKillDeadInfo(MI); - - return NewMI; -#else - return 0; -#endif -} - -/// General-purpose load/store fold to operand code -MachineInstr * -SPURegisterInfo::foldMemoryOperand(MachineInstr *MI, - SmallVectorImpl &Ops, - MachineInstr *LoadMI) const -{ - return 0; -} - //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h index c5df762..b806e80 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.h +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -42,16 +42,6 @@ namespace llvm { void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, const MachineInstr *Orig) const; - - //! Fold spills into load/store instructions - virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, - SmallVectorImpl &Ops, - int FrameIndex) const; - - //! Fold any load/store to an operand - virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, - SmallVectorImpl &Ops, - MachineInstr* LoadMI) const; //! Return the array of callee-saved registers virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const; diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 30d6805..130bfb8 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -371,6 +371,37 @@ void MipsInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, return; } +MachineInstr *MipsInstrInfo:: +foldMemoryOperand(MachineInstr* MI, + SmallVectorImpl &Ops, int FI) const +{ + if (Ops.size() != 1) return NULL; + + MachineInstr *NewMI = NULL; + + switch (MI->getOpcode()) + { + case Mips::ADDu: + if ((MI->getOperand(0).isRegister()) && + (MI->getOperand(1).isRegister()) && + (MI->getOperand(1).getReg() == Mips::ZERO) && + (MI->getOperand(2).isRegister())) + { + if (Ops[0] == 0) // COPY -> STORE + NewMI = BuildMI(get(Mips::SW)).addFrameIndex(FI) + .addImm(0).addReg(MI->getOperand(2).getReg()); + else // COPY -> LOAD + NewMI = BuildMI(get(Mips::LW), MI->getOperand(0) + .getReg()).addImm(0).addFrameIndex(FI); + } + break; + } + + if (NewMI) + NewMI->copyKillDeadInfo(MI); + return NewMI; +} + unsigned MipsInstrInfo:: RemoveBranch(MachineBasicBlock &MBB) const { diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 2d21083..69ab795 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -105,6 +105,17 @@ public: SmallVectorImpl &Addr, const TargetRegisterClass *RC, SmallVectorImpl &NewMIs) const; + + virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, + SmallVectorImpl &Ops, + int FrameIndex) const; + + virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, + SmallVectorImpl &Ops, + MachineInstr* LoadMI) const { + return 0; + } + virtual bool BlockHasNoFallThrough(MachineBasicBlock &MBB) const; virtual bool ReverseBranchCondition(std::vector &Cond) const; diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 2988eea..b06ccd8 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -93,37 +93,6 @@ void MipsRegisterInfo::reMaterialize(MachineBasicBlock &MBB, MBB.insert(I, MI); } -MachineInstr *MipsRegisterInfo:: -foldMemoryOperand(MachineInstr* MI, - SmallVectorImpl &Ops, int FI) const -{ - if (Ops.size() != 1) return NULL; - - MachineInstr *NewMI = NULL; - - switch (MI->getOpcode()) - { - case Mips::ADDu: - if ((MI->getOperand(0).isRegister()) && - (MI->getOperand(1).isRegister()) && - (MI->getOperand(1).getReg() == Mips::ZERO) && - (MI->getOperand(2).isRegister())) - { - if (Ops[0] == 0) // COPY -> STORE - NewMI = BuildMI(TII.get(Mips::SW)).addFrameIndex(FI) - .addImm(0).addReg(MI->getOperand(2).getReg()); - else // COPY -> LOAD - NewMI = BuildMI(TII.get(Mips::LW), MI->getOperand(0) - .getReg()).addImm(0).addFrameIndex(FI); - } - break; - } - - if (NewMI) - NewMI->copyKillDeadInfo(MI); - return NewMI; -} - //===----------------------------------------------------------------------===// // // Callee Saved Registers methods diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index b4d7d6c..2181cdf 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -35,16 +35,6 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, const MachineInstr *Orig) const; - MachineInstr* foldMemoryOperand(MachineInstr* MI, - SmallVectorImpl &Ops, - int FrameIndex) const; - - MachineInstr* foldMemoryOperand(MachineInstr* MI, - SmallVectorImpl &Ops, - MachineInstr* LoadMI) const { - return 0; - } - const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; const TargetRegisterClass* const* diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 2ad5e65..4d404a5 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -534,6 +534,85 @@ void PPCInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, return; } +/// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into +/// copy instructions, turning them into load/store instructions. +MachineInstr *PPCInstrInfo::foldMemoryOperand(MachineInstr *MI, + SmallVectorImpl &Ops, + int FrameIndex) const { + if (Ops.size() != 1) return NULL; + + // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because + // it takes more than one instruction to store it. + unsigned Opc = MI->getOpcode(); + unsigned OpNum = Ops[0]; + + MachineInstr *NewMI = NULL; + if ((Opc == PPC::OR && + MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) { + if (OpNum == 0) { // move -> store + unsigned InReg = MI->getOperand(1).getReg(); + NewMI = addFrameReference(BuildMI(get(PPC::STW)).addReg(InReg), + FrameIndex); + } else { // move -> load + unsigned OutReg = MI->getOperand(0).getReg(); + NewMI = addFrameReference(BuildMI(get(PPC::LWZ), OutReg), + FrameIndex); + } + } else if ((Opc == PPC::OR8 && + MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) { + if (OpNum == 0) { // move -> store + unsigned InReg = MI->getOperand(1).getReg(); + NewMI = addFrameReference(BuildMI(get(PPC::STD)).addReg(InReg), + FrameIndex); + } else { // move -> load + unsigned OutReg = MI->getOperand(0).getReg(); + NewMI = addFrameReference(BuildMI(get(PPC::LD), OutReg), FrameIndex); + } + } else if (Opc == PPC::FMRD) { + if (OpNum == 0) { // move -> store + unsigned InReg = MI->getOperand(1).getReg(); + NewMI = addFrameReference(BuildMI(get(PPC::STFD)).addReg(InReg), + FrameIndex); + } else { // move -> load + unsigned OutReg = MI->getOperand(0).getReg(); + NewMI = addFrameReference(BuildMI(get(PPC::LFD), OutReg), FrameIndex); + } + } else if (Opc == PPC::FMRS) { + if (OpNum == 0) { // move -> store + unsigned InReg = MI->getOperand(1).getReg(); + NewMI = addFrameReference(BuildMI(get(PPC::STFS)).addReg(InReg), + FrameIndex); + } else { // move -> load + unsigned OutReg = MI->getOperand(0).getReg(); + NewMI = addFrameReference(BuildMI(get(PPC::LFS), OutReg), FrameIndex); + } + } + + if (NewMI) + NewMI->copyKillDeadInfo(MI); + return NewMI; +} + +bool PPCInstrInfo::canFoldMemoryOperand(MachineInstr *MI, + SmallVectorImpl &Ops) const { + if (Ops.size() != 1) return false; + + // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because + // it takes more than one instruction to store it. + unsigned Opc = MI->getOpcode(); + + if ((Opc == PPC::OR && + MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) + return true; + else if ((Opc == PPC::OR8 && + MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) + return true; + else if (Opc == PPC::FMRD || Opc == PPC::FMRS) + return true; + + return false; +} + bool PPCInstrInfo::BlockHasNoFallThrough(MachineBasicBlock &MBB) const { if (MBB.empty()) return false; diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 7591f77..02d8bba 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -129,6 +129,21 @@ public: const TargetRegisterClass *RC, SmallVectorImpl &NewMIs) const; + /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into + /// copy instructions, turning them into load/store instructions. + virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, + SmallVectorImpl &Ops, + int FrameIndex) const; + + virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, + SmallVectorImpl &Ops, + MachineInstr* LoadMI) const { + return 0; + } + + virtual bool canFoldMemoryOperand(MachineInstr *MI, + SmallVectorImpl &Ops) const; + virtual bool BlockHasNoFallThrough(MachineBasicBlock &MBB) const; virtual bool ReverseBranchCondition(std::vector &Cond) const; }; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 0dc4ed2..7609e09 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -298,85 +298,6 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -/// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into -/// copy instructions, turning them into load/store instructions. -MachineInstr *PPCRegisterInfo::foldMemoryOperand(MachineInstr *MI, - SmallVectorImpl &Ops, - int FrameIndex) const { - if (Ops.size() != 1) return NULL; - - // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because - // it takes more than one instruction to store it. - unsigned Opc = MI->getOpcode(); - unsigned OpNum = Ops[0]; - - MachineInstr *NewMI = NULL; - if ((Opc == PPC::OR && - MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) { - if (OpNum == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - NewMI = addFrameReference(BuildMI(TII.get(PPC::STW)).addReg(InReg), - FrameIndex); - } else { // move -> load - unsigned OutReg = MI->getOperand(0).getReg(); - NewMI = addFrameReference(BuildMI(TII.get(PPC::LWZ), OutReg), - FrameIndex); - } - } else if ((Opc == PPC::OR8 && - MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) { - if (OpNum == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - NewMI = addFrameReference(BuildMI(TII.get(PPC::STD)).addReg(InReg), - FrameIndex); - } else { // move -> load - unsigned OutReg = MI->getOperand(0).getReg(); - NewMI = addFrameReference(BuildMI(TII.get(PPC::LD), OutReg), FrameIndex); - } - } else if (Opc == PPC::FMRD) { - if (OpNum == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - NewMI = addFrameReference(BuildMI(TII.get(PPC::STFD)).addReg(InReg), - FrameIndex); - } else { // move -> load - unsigned OutReg = MI->getOperand(0).getReg(); - NewMI = addFrameReference(BuildMI(TII.get(PPC::LFD), OutReg), FrameIndex); - } - } else if (Opc == PPC::FMRS) { - if (OpNum == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - NewMI = addFrameReference(BuildMI(TII.get(PPC::STFS)).addReg(InReg), - FrameIndex); - } else { // move -> load - unsigned OutReg = MI->getOperand(0).getReg(); - NewMI = addFrameReference(BuildMI(TII.get(PPC::LFS), OutReg), FrameIndex); - } - } - - if (NewMI) - NewMI->copyKillDeadInfo(MI); - return NewMI; -} - -bool PPCRegisterInfo::canFoldMemoryOperand(MachineInstr *MI, - SmallVectorImpl &Ops) const { - if (Ops.size() != 1) return false; - - // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because - // it takes more than one instruction to store it. - unsigned Opc = MI->getOpcode(); - - if ((Opc == PPC::OR && - MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) - return true; - else if ((Opc == PPC::OR8 && - MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) - return true; - else if (Opc == PPC::FMRD || Opc == PPC::FMRS) - return true; - - return false; -} - //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 6a01e1f..2a84fc2 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -38,21 +38,6 @@ public: void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, const MachineInstr *Orig) const; - /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into - /// copy instructions, turning them into load/store instructions. - virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, - SmallVectorImpl &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, - SmallVectorImpl &Ops, - MachineInstr* LoadMI) const { - return 0; - } - - virtual bool canFoldMemoryOperand(MachineInstr *MI, - SmallVectorImpl &Ops) const; - const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; const TargetRegisterClass* const* diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index baf6d8f..b587c28 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -221,3 +221,41 @@ void SparcInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, NewMIs.push_back(MIB); return; } + +MachineInstr *SparcInstrInfo::foldMemoryOperand(MachineInstr* MI, + SmallVectorImpl &Ops, + int FI) const { + if (Ops.size() != 1) return NULL; + + unsigned OpNum = Ops[0]; + bool isFloat = false; + MachineInstr *NewMI = NULL; + switch (MI->getOpcode()) { + case SP::ORrr: + if (MI->getOperand(1).isRegister() && MI->getOperand(1).getReg() == SP::G0&& + MI->getOperand(0).isRegister() && MI->getOperand(2).isRegister()) { + if (OpNum == 0) // COPY -> STORE + NewMI = BuildMI(get(SP::STri)).addFrameIndex(FI).addImm(0) + .addReg(MI->getOperand(2).getReg()); + else // COPY -> LOAD + NewMI = BuildMI(get(SP::LDri), MI->getOperand(0).getReg()) + .addFrameIndex(FI).addImm(0); + } + break; + case SP::FMOVS: + isFloat = true; + // FALLTHROUGH + case SP::FMOVD: + if (OpNum == 0) // COPY -> STORE + NewMI = BuildMI(get(isFloat ? SP::STFri : SP::STDFri)) + .addFrameIndex(FI).addImm(0).addReg(MI->getOperand(1).getReg()); + else // COPY -> LOAD + NewMI = BuildMI(get(isFloat ? SP::LDFri : SP::LDDFri), + MI->getOperand(0).getReg()).addFrameIndex(FI).addImm(0); + break; + } + + if (NewMI) + NewMI->copyKillDeadInfo(MI); + return NewMI; +} \ No newline at end of file diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index 7c4d056..0ed7fab 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -93,6 +93,16 @@ public: SmallVectorImpl &Addr, const TargetRegisterClass *RC, SmallVectorImpl &NewMIs) const; + + virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, + SmallVectorImpl &Ops, + int FrameIndex) const; + + virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, + SmallVectorImpl &Ops, + MachineInstr* LoadMI) const { + return 0; + } }; } diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 9ca7d45..6b93c68 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -39,44 +39,6 @@ void SparcRegisterInfo::reMaterialize(MachineBasicBlock &MBB, MBB.insert(I, MI); } -MachineInstr *SparcRegisterInfo::foldMemoryOperand(MachineInstr* MI, - SmallVectorImpl &Ops, - int FI) const { - if (Ops.size() != 1) return NULL; - - unsigned OpNum = Ops[0]; - bool isFloat = false; - MachineInstr *NewMI = NULL; - switch (MI->getOpcode()) { - case SP::ORrr: - if (MI->getOperand(1).isRegister() && MI->getOperand(1).getReg() == SP::G0&& - MI->getOperand(0).isRegister() && MI->getOperand(2).isRegister()) { - if (OpNum == 0) // COPY -> STORE - NewMI = BuildMI(TII.get(SP::STri)).addFrameIndex(FI).addImm(0) - .addReg(MI->getOperand(2).getReg()); - else // COPY -> LOAD - NewMI = BuildMI(TII.get(SP::LDri), MI->getOperand(0).getReg()) - .addFrameIndex(FI).addImm(0); - } - break; - case SP::FMOVS: - isFloat = true; - // FALLTHROUGH - case SP::FMOVD: - if (OpNum == 0) // COPY -> STORE - NewMI = BuildMI(TII.get(isFloat ? SP::STFri : SP::STDFri)) - .addFrameIndex(FI).addImm(0).addReg(MI->getOperand(1).getReg()); - else // COPY -> LOAD - NewMI = BuildMI(TII.get(isFloat ? SP::LDFri : SP::LDDFri), - MI->getOperand(0).getReg()).addFrameIndex(FI).addImm(0); - break; - } - - if (NewMI) - NewMI->copyKillDeadInfo(MI); - return NewMI; -} - const unsigned* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { static const unsigned CalleeSavedRegs[] = { 0 }; diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index b9a6c50..2235de1 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -33,16 +33,6 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, const MachineInstr *Orig) const; - virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, - SmallVectorImpl &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, - SmallVectorImpl &Ops, - MachineInstr* LoadMI) const { - return 0; - } - const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; const TargetRegisterClass* const* getCalleeSavedRegClasses( diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index c19827e..42b25a9 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -23,12 +23,614 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetOptions.h" + using namespace llvm; +namespace { + cl::opt + NoFusing("disable-spill-fusing", + cl::desc("Disable fusing of spill code into instructions")); + cl::opt + PrintFailedFusing("print-failed-fuse-candidates", + cl::desc("Print instructions that the allocator wants to" + " fuse, but the X86 backend currently can't"), + cl::Hidden); +} + X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), TM(tm), RI(tm, *this) { + SmallVector AmbEntries; + static const unsigned OpTbl2Addr[][2] = { + { X86::ADC32ri, X86::ADC32mi }, + { X86::ADC32ri8, X86::ADC32mi8 }, + { X86::ADC32rr, X86::ADC32mr }, + { X86::ADC64ri32, X86::ADC64mi32 }, + { X86::ADC64ri8, X86::ADC64mi8 }, + { X86::ADC64rr, X86::ADC64mr }, + { X86::ADD16ri, X86::ADD16mi }, + { X86::ADD16ri8, X86::ADD16mi8 }, + { X86::ADD16rr, X86::ADD16mr }, + { X86::ADD32ri, X86::ADD32mi }, + { X86::ADD32ri8, X86::ADD32mi8 }, + { X86::ADD32rr, X86::ADD32mr }, + { X86::ADD64ri32, X86::ADD64mi32 }, + { X86::ADD64ri8, X86::ADD64mi8 }, + { X86::ADD64rr, X86::ADD64mr }, + { X86::ADD8ri, X86::ADD8mi }, + { X86::ADD8rr, X86::ADD8mr }, + { X86::AND16ri, X86::AND16mi }, + { X86::AND16ri8, X86::AND16mi8 }, + { X86::AND16rr, X86::AND16mr }, + { X86::AND32ri, X86::AND32mi }, + { X86::AND32ri8, X86::AND32mi8 }, + { X86::AND32rr, X86::AND32mr }, + { X86::AND64ri32, X86::AND64mi32 }, + { X86::AND64ri8, X86::AND64mi8 }, + { X86::AND64rr, X86::AND64mr }, + { X86::AND8ri, X86::AND8mi }, + { X86::AND8rr, X86::AND8mr }, + { X86::DEC16r, X86::DEC16m }, + { X86::DEC32r, X86::DEC32m }, + { X86::DEC64_16r, X86::DEC64_16m }, + { X86::DEC64_32r, X86::DEC64_32m }, + { X86::DEC64r, X86::DEC64m }, + { X86::DEC8r, X86::DEC8m }, + { X86::INC16r, X86::INC16m }, + { X86::INC32r, X86::INC32m }, + { X86::INC64_16r, X86::INC64_16m }, + { X86::INC64_32r, X86::INC64_32m }, + { X86::INC64r, X86::INC64m }, + { X86::INC8r, X86::INC8m }, + { X86::NEG16r, X86::NEG16m }, + { X86::NEG32r, X86::NEG32m }, + { X86::NEG64r, X86::NEG64m }, + { X86::NEG8r, X86::NEG8m }, + { X86::NOT16r, X86::NOT16m }, + { X86::NOT32r, X86::NOT32m }, + { X86::NOT64r, X86::NOT64m }, + { X86::NOT8r, X86::NOT8m }, + { X86::OR16ri, X86::OR16mi }, + { X86::OR16ri8, X86::OR16mi8 }, + { X86::OR16rr, X86::OR16mr }, + { X86::OR32ri, X86::OR32mi }, + { X86::OR32ri8, X86::OR32mi8 }, + { X86::OR32rr, X86::OR32mr }, + { X86::OR64ri32, X86::OR64mi32 }, + { X86::OR64ri8, X86::OR64mi8 }, + { X86::OR64rr, X86::OR64mr }, + { X86::OR8ri, X86::OR8mi }, + { X86::OR8rr, X86::OR8mr }, + { X86::ROL16r1, X86::ROL16m1 }, + { X86::ROL16rCL, X86::ROL16mCL }, + { X86::ROL16ri, X86::ROL16mi }, + { X86::ROL32r1, X86::ROL32m1 }, + { X86::ROL32rCL, X86::ROL32mCL }, + { X86::ROL32ri, X86::ROL32mi }, + { X86::ROL64r1, X86::ROL64m1 }, + { X86::ROL64rCL, X86::ROL64mCL }, + { X86::ROL64ri, X86::ROL64mi }, + { X86::ROL8r1, X86::ROL8m1 }, + { X86::ROL8rCL, X86::ROL8mCL }, + { X86::ROL8ri, X86::ROL8mi }, + { X86::ROR16r1, X86::ROR16m1 }, + { X86::ROR16rCL, X86::ROR16mCL }, + { X86::ROR16ri, X86::ROR16mi }, + { X86::ROR32r1, X86::ROR32m1 }, + { X86::ROR32rCL, X86::ROR32mCL }, + { X86::ROR32ri, X86::ROR32mi }, + { X86::ROR64r1, X86::ROR64m1 }, + { X86::ROR64rCL, X86::ROR64mCL }, + { X86::ROR64ri, X86::ROR64mi }, + { X86::ROR8r1, X86::ROR8m1 }, + { X86::ROR8rCL, X86::ROR8mCL }, + { X86::ROR8ri, X86::ROR8mi }, + { X86::SAR16r1, X86::SAR16m1 }, + { X86::SAR16rCL, X86::SAR16mCL }, + { X86::SAR16ri, X86::SAR16mi }, + { X86::SAR32r1, X86::SAR32m1 }, + { X86::SAR32rCL, X86::SAR32mCL }, + { X86::SAR32ri, X86::SAR32mi }, + { X86::SAR64r1, X86::SAR64m1 }, + { X86::SAR64rCL, X86::SAR64mCL }, + { X86::SAR64ri, X86::SAR64mi }, + { X86::SAR8r1, X86::SAR8m1 }, + { X86::SAR8rCL, X86::SAR8mCL }, + { X86::SAR8ri, X86::SAR8mi }, + { X86::SBB32ri, X86::SBB32mi }, + { X86::SBB32ri8, X86::SBB32mi8 }, + { X86::SBB32rr, X86::SBB32mr }, + { X86::SBB64ri32, X86::SBB64mi32 }, + { X86::SBB64ri8, X86::SBB64mi8 }, + { X86::SBB64rr, X86::SBB64mr }, + { X86::SHL16r1, X86::SHL16m1 }, + { X86::SHL16rCL, X86::SHL16mCL }, + { X86::SHL16ri, X86::SHL16mi }, + { X86::SHL32r1, X86::SHL32m1 }, + { X86::SHL32rCL, X86::SHL32mCL }, + { X86::SHL32ri, X86::SHL32mi }, + { X86::SHL64r1, X86::SHL64m1 }, + { X86::SHL64rCL, X86::SHL64mCL }, + { X86::SHL64ri, X86::SHL64mi }, + { X86::SHL8r1, X86::SHL8m1 }, + { X86::SHL8rCL, X86::SHL8mCL }, + { X86::SHL8ri, X86::SHL8mi }, + { X86::SHLD16rrCL, X86::SHLD16mrCL }, + { X86::SHLD16rri8, X86::SHLD16mri8 }, + { X86::SHLD32rrCL, X86::SHLD32mrCL }, + { X86::SHLD32rri8, X86::SHLD32mri8 }, + { X86::SHLD64rrCL, X86::SHLD64mrCL }, + { X86::SHLD64rri8, X86::SHLD64mri8 }, + { X86::SHR16r1, X86::SHR16m1 }, + { X86::SHR16rCL, X86::SHR16mCL }, + { X86::SHR16ri, X86::SHR16mi }, + { X86::SHR32r1, X86::SHR32m1 }, + { X86::SHR32rCL, X86::SHR32mCL }, + { X86::SHR32ri, X86::SHR32mi }, + { X86::SHR64r1, X86::SHR64m1 }, + { X86::SHR64rCL, X86::SHR64mCL }, + { X86::SHR64ri, X86::SHR64mi }, + { X86::SHR8r1, X86::SHR8m1 }, + { X86::SHR8rCL, X86::SHR8mCL }, + { X86::SHR8ri, X86::SHR8mi }, + { X86::SHRD16rrCL, X86::SHRD16mrCL }, + { X86::SHRD16rri8, X86::SHRD16mri8 }, + { X86::SHRD32rrCL, X86::SHRD32mrCL }, + { X86::SHRD32rri8, X86::SHRD32mri8 }, + { X86::SHRD64rrCL, X86::SHRD64mrCL }, + { X86::SHRD64rri8, X86::SHRD64mri8 }, + { X86::SUB16ri, X86::SUB16mi }, + { X86::SUB16ri8, X86::SUB16mi8 }, + { X86::SUB16rr, X86::SUB16mr }, + { X86::SUB32ri, X86::SUB32mi }, + { X86::SUB32ri8, X86::SUB32mi8 }, + { X86::SUB32rr, X86::SUB32mr }, + { X86::SUB64ri32, X86::SUB64mi32 }, + { X86::SUB64ri8, X86::SUB64mi8 }, + { X86::SUB64rr, X86::SUB64mr }, + { X86::SUB8ri, X86::SUB8mi }, + { X86::SUB8rr, X86::SUB8mr }, + { X86::XOR16ri, X86::XOR16mi }, + { X86::XOR16ri8, X86::XOR16mi8 }, + { X86::XOR16rr, X86::XOR16mr }, + { X86::XOR32ri, X86::XOR32mi }, + { X86::XOR32ri8, X86::XOR32mi8 }, + { X86::XOR32rr, X86::XOR32mr }, + { X86::XOR64ri32, X86::XOR64mi32 }, + { X86::XOR64ri8, X86::XOR64mi8 }, + { X86::XOR64rr, X86::XOR64mr }, + { X86::XOR8ri, X86::XOR8mi }, + { X86::XOR8rr, X86::XOR8mr } + }; + + for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { + unsigned RegOp = OpTbl2Addr[i][0]; + unsigned MemOp = OpTbl2Addr[i][1]; + if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp, MemOp))) + assert(false && "Duplicated entries?"); + unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); // Index 0,folded load and store + if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, + std::make_pair(RegOp, AuxInfo)))) + AmbEntries.push_back(MemOp); + } + + // If the third value is 1, then it's folding either a load or a store. + static const unsigned OpTbl0[][3] = { + { X86::CALL32r, X86::CALL32m, 1 }, + { X86::CALL64r, X86::CALL64m, 1 }, + { X86::CMP16ri, X86::CMP16mi, 1 }, + { X86::CMP16ri8, X86::CMP16mi8, 1 }, + { X86::CMP32ri, X86::CMP32mi, 1 }, + { X86::CMP32ri8, X86::CMP32mi8, 1 }, + { X86::CMP64ri32, X86::CMP64mi32, 1 }, + { X86::CMP64ri8, X86::CMP64mi8, 1 }, + { X86::CMP8ri, X86::CMP8mi, 1 }, + { X86::DIV16r, X86::DIV16m, 1 }, + { X86::DIV32r, X86::DIV32m, 1 }, + { X86::DIV64r, X86::DIV64m, 1 }, + { X86::DIV8r, X86::DIV8m, 1 }, + { X86::FsMOVAPDrr, X86::MOVSDmr, 0 }, + { X86::FsMOVAPSrr, X86::MOVSSmr, 0 }, + { X86::IDIV16r, X86::IDIV16m, 1 }, + { X86::IDIV32r, X86::IDIV32m, 1 }, + { X86::IDIV64r, X86::IDIV64m, 1 }, + { X86::IDIV8r, X86::IDIV8m, 1 }, + { X86::IMUL16r, X86::IMUL16m, 1 }, + { X86::IMUL32r, X86::IMUL32m, 1 }, + { X86::IMUL64r, X86::IMUL64m, 1 }, + { X86::IMUL8r, X86::IMUL8m, 1 }, + { X86::JMP32r, X86::JMP32m, 1 }, + { X86::JMP64r, X86::JMP64m, 1 }, + { X86::MOV16ri, X86::MOV16mi, 0 }, + { X86::MOV16rr, X86::MOV16mr, 0 }, + { X86::MOV16to16_, X86::MOV16_mr, 0 }, + { X86::MOV32ri, X86::MOV32mi, 0 }, + { X86::MOV32rr, X86::MOV32mr, 0 }, + { X86::MOV32to32_, X86::MOV32_mr, 0 }, + { X86::MOV64ri32, X86::MOV64mi32, 0 }, + { X86::MOV64rr, X86::MOV64mr, 0 }, + { X86::MOV8ri, X86::MOV8mi, 0 }, + { X86::MOV8rr, X86::MOV8mr, 0 }, + { X86::MOVAPDrr, X86::MOVAPDmr, 0 }, + { X86::MOVAPSrr, X86::MOVAPSmr, 0 }, + { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0 }, + { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0 }, + { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0 }, + { X86::MOVSDrr, X86::MOVSDmr, 0 }, + { X86::MOVSDto64rr, X86::MOVSDto64mr, 0 }, + { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0 }, + { X86::MOVSSrr, X86::MOVSSmr, 0 }, + { X86::MOVUPDrr, X86::MOVUPDmr, 0 }, + { X86::MOVUPSrr, X86::MOVUPSmr, 0 }, + { X86::MUL16r, X86::MUL16m, 1 }, + { X86::MUL32r, X86::MUL32m, 1 }, + { X86::MUL64r, X86::MUL64m, 1 }, + { X86::MUL8r, X86::MUL8m, 1 }, + { X86::SETAEr, X86::SETAEm, 0 }, + { X86::SETAr, X86::SETAm, 0 }, + { X86::SETBEr, X86::SETBEm, 0 }, + { X86::SETBr, X86::SETBm, 0 }, + { X86::SETEr, X86::SETEm, 0 }, + { X86::SETGEr, X86::SETGEm, 0 }, + { X86::SETGr, X86::SETGm, 0 }, + { X86::SETLEr, X86::SETLEm, 0 }, + { X86::SETLr, X86::SETLm, 0 }, + { X86::SETNEr, X86::SETNEm, 0 }, + { X86::SETNPr, X86::SETNPm, 0 }, + { X86::SETNSr, X86::SETNSm, 0 }, + { X86::SETPr, X86::SETPm, 0 }, + { X86::SETSr, X86::SETSm, 0 }, + { X86::TAILJMPr, X86::TAILJMPm, 1 }, + { X86::TEST16ri, X86::TEST16mi, 1 }, + { X86::TEST32ri, X86::TEST32mi, 1 }, + { X86::TEST64ri32, X86::TEST64mi32, 1 }, + { X86::TEST8ri, X86::TEST8mi, 1 }, + { X86::XCHG16rr, X86::XCHG16mr, 0 }, + { X86::XCHG32rr, X86::XCHG32mr, 0 }, + { X86::XCHG64rr, X86::XCHG64mr, 0 }, + { X86::XCHG8rr, X86::XCHG8mr, 0 } + }; + + for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { + unsigned RegOp = OpTbl0[i][0]; + unsigned MemOp = OpTbl0[i][1]; + if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp, MemOp))) + assert(false && "Duplicated entries?"); + unsigned FoldedLoad = OpTbl0[i][2]; + // Index 0, folded load or store. + unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5); + if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) + if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, + std::make_pair(RegOp, AuxInfo)))) + AmbEntries.push_back(MemOp); + } + + static const unsigned OpTbl1[][2] = { + { X86::CMP16rr, X86::CMP16rm }, + { X86::CMP32rr, X86::CMP32rm }, + { X86::CMP64rr, X86::CMP64rm }, + { X86::CMP8rr, X86::CMP8rm }, + { X86::CVTSD2SSrr, X86::CVTSD2SSrm }, + { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm }, + { X86::CVTSI2SDrr, X86::CVTSI2SDrm }, + { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm }, + { X86::CVTSI2SSrr, X86::CVTSI2SSrm }, + { X86::CVTSS2SDrr, X86::CVTSS2SDrm }, + { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm }, + { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm }, + { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm }, + { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm }, + { X86::FsMOVAPDrr, X86::MOVSDrm }, + { X86::FsMOVAPSrr, X86::MOVSSrm }, + { X86::IMUL16rri, X86::IMUL16rmi }, + { X86::IMUL16rri8, X86::IMUL16rmi8 }, + { X86::IMUL32rri, X86::IMUL32rmi }, + { X86::IMUL32rri8, X86::IMUL32rmi8 }, + { X86::IMUL64rri32, X86::IMUL64rmi32 }, + { X86::IMUL64rri8, X86::IMUL64rmi8 }, + { X86::Int_CMPSDrr, X86::Int_CMPSDrm }, + { X86::Int_CMPSSrr, X86::Int_CMPSSrm }, + { X86::Int_COMISDrr, X86::Int_COMISDrm }, + { X86::Int_COMISSrr, X86::Int_COMISSrm }, + { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm }, + { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm }, + { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm }, + { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm }, + { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm }, + { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm }, + { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm }, + { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm }, + { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm }, + { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm }, + { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm }, + { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm }, + { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm }, + { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm }, + { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm }, + { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm }, + { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm }, + { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm }, + { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm }, + { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm }, + { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm }, + { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm }, + { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm }, + { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm }, + { X86::MOV16rr, X86::MOV16rm }, + { X86::MOV16to16_, X86::MOV16_rm }, + { X86::MOV32rr, X86::MOV32rm }, + { X86::MOV32to32_, X86::MOV32_rm }, + { X86::MOV64rr, X86::MOV64rm }, + { X86::MOV64toPQIrr, X86::MOVQI2PQIrm }, + { X86::MOV64toSDrr, X86::MOV64toSDrm }, + { X86::MOV8rr, X86::MOV8rm }, + { X86::MOVAPDrr, X86::MOVAPDrm }, + { X86::MOVAPSrr, X86::MOVAPSrm }, + { X86::MOVDDUPrr, X86::MOVDDUPrm }, + { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm }, + { X86::MOVDI2SSrr, X86::MOVDI2SSrm }, + { X86::MOVSD2PDrr, X86::MOVSD2PDrm }, + { X86::MOVSDrr, X86::MOVSDrm }, + { X86::MOVSHDUPrr, X86::MOVSHDUPrm }, + { X86::MOVSLDUPrr, X86::MOVSLDUPrm }, + { X86::MOVSS2PSrr, X86::MOVSS2PSrm }, + { X86::MOVSSrr, X86::MOVSSrm }, + { X86::MOVSX16rr8, X86::MOVSX16rm8 }, + { X86::MOVSX32rr16, X86::MOVSX32rm16 }, + { X86::MOVSX32rr8, X86::MOVSX32rm8 }, + { X86::MOVSX64rr16, X86::MOVSX64rm16 }, + { X86::MOVSX64rr32, X86::MOVSX64rm32 }, + { X86::MOVSX64rr8, X86::MOVSX64rm8 }, + { X86::MOVUPDrr, X86::MOVUPDrm }, + { X86::MOVUPSrr, X86::MOVUPSrm }, + { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm }, + { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm }, + { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm }, + { X86::MOVZX16rr8, X86::MOVZX16rm8 }, + { X86::MOVZX32rr16, X86::MOVZX32rm16 }, + { X86::MOVZX32rr8, X86::MOVZX32rm8 }, + { X86::MOVZX64rr16, X86::MOVZX64rm16 }, + { X86::MOVZX64rr8, X86::MOVZX64rm8 }, + { X86::PSHUFDri, X86::PSHUFDmi }, + { X86::PSHUFHWri, X86::PSHUFHWmi }, + { X86::PSHUFLWri, X86::PSHUFLWmi }, + { X86::PsMOVZX64rr32, X86::PsMOVZX64rm32 }, + { X86::RCPPSr, X86::RCPPSm }, + { X86::RCPPSr_Int, X86::RCPPSm_Int }, + { X86::RSQRTPSr, X86::RSQRTPSm }, + { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int }, + { X86::RSQRTSSr, X86::RSQRTSSm }, + { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int }, + { X86::SQRTPDr, X86::SQRTPDm }, + { X86::SQRTPDr_Int, X86::SQRTPDm_Int }, + { X86::SQRTPSr, X86::SQRTPSm }, + { X86::SQRTPSr_Int, X86::SQRTPSm_Int }, + { X86::SQRTSDr, X86::SQRTSDm }, + { X86::SQRTSDr_Int, X86::SQRTSDm_Int }, + { X86::SQRTSSr, X86::SQRTSSm }, + { X86::SQRTSSr_Int, X86::SQRTSSm_Int }, + { X86::TEST16rr, X86::TEST16rm }, + { X86::TEST32rr, X86::TEST32rm }, + { X86::TEST64rr, X86::TEST64rm }, + { X86::TEST8rr, X86::TEST8rm }, + // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 + { X86::UCOMISDrr, X86::UCOMISDrm }, + { X86::UCOMISSrr, X86::UCOMISSrm }, + { X86::XCHG16rr, X86::XCHG16rm }, + { X86::XCHG32rr, X86::XCHG32rm }, + { X86::XCHG64rr, X86::XCHG64rm }, + { X86::XCHG8rr, X86::XCHG8rm } + }; + + for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { + unsigned RegOp = OpTbl1[i][0]; + unsigned MemOp = OpTbl1[i][1]; + if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp, MemOp))) + assert(false && "Duplicated entries?"); + unsigned AuxInfo = 1 | (1 << 4); // Index 1, folded load + if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) + if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, + std::make_pair(RegOp, AuxInfo)))) + AmbEntries.push_back(MemOp); + } + + static const unsigned OpTbl2[][2] = { + { X86::ADC32rr, X86::ADC32rm }, + { X86::ADC64rr, X86::ADC64rm }, + { X86::ADD16rr, X86::ADD16rm }, + { X86::ADD32rr, X86::ADD32rm }, + { X86::ADD64rr, X86::ADD64rm }, + { X86::ADD8rr, X86::ADD8rm }, + { X86::ADDPDrr, X86::ADDPDrm }, + { X86::ADDPSrr, X86::ADDPSrm }, + { X86::ADDSDrr, X86::ADDSDrm }, + { X86::ADDSSrr, X86::ADDSSrm }, + { X86::ADDSUBPDrr, X86::ADDSUBPDrm }, + { X86::ADDSUBPSrr, X86::ADDSUBPSrm }, + { X86::AND16rr, X86::AND16rm }, + { X86::AND32rr, X86::AND32rm }, + { X86::AND64rr, X86::AND64rm }, + { X86::AND8rr, X86::AND8rm }, + { X86::ANDNPDrr, X86::ANDNPDrm }, + { X86::ANDNPSrr, X86::ANDNPSrm }, + { X86::ANDPDrr, X86::ANDPDrm }, + { X86::ANDPSrr, X86::ANDPSrm }, + { X86::CMOVA16rr, X86::CMOVA16rm }, + { X86::CMOVA32rr, X86::CMOVA32rm }, + { X86::CMOVA64rr, X86::CMOVA64rm }, + { X86::CMOVAE16rr, X86::CMOVAE16rm }, + { X86::CMOVAE32rr, X86::CMOVAE32rm }, + { X86::CMOVAE64rr, X86::CMOVAE64rm }, + { X86::CMOVB16rr, X86::CMOVB16rm }, + { X86::CMOVB32rr, X86::CMOVB32rm }, + { X86::CMOVB64rr, X86::CMOVB64rm }, + { X86::CMOVBE16rr, X86::CMOVBE16rm }, + { X86::CMOVBE32rr, X86::CMOVBE32rm }, + { X86::CMOVBE64rr, X86::CMOVBE64rm }, + { X86::CMOVE16rr, X86::CMOVE16rm }, + { X86::CMOVE32rr, X86::CMOVE32rm }, + { X86::CMOVE64rr, X86::CMOVE64rm }, + { X86::CMOVG16rr, X86::CMOVG16rm }, + { X86::CMOVG32rr, X86::CMOVG32rm }, + { X86::CMOVG64rr, X86::CMOVG64rm }, + { X86::CMOVGE16rr, X86::CMOVGE16rm }, + { X86::CMOVGE32rr, X86::CMOVGE32rm }, + { X86::CMOVGE64rr, X86::CMOVGE64rm }, + { X86::CMOVL16rr, X86::CMOVL16rm }, + { X86::CMOVL32rr, X86::CMOVL32rm }, + { X86::CMOVL64rr, X86::CMOVL64rm }, + { X86::CMOVLE16rr, X86::CMOVLE16rm }, + { X86::CMOVLE32rr, X86::CMOVLE32rm }, + { X86::CMOVLE64rr, X86::CMOVLE64rm }, + { X86::CMOVNE16rr, X86::CMOVNE16rm }, + { X86::CMOVNE32rr, X86::CMOVNE32rm }, + { X86::CMOVNE64rr, X86::CMOVNE64rm }, + { X86::CMOVNP16rr, X86::CMOVNP16rm }, + { X86::CMOVNP32rr, X86::CMOVNP32rm }, + { X86::CMOVNP64rr, X86::CMOVNP64rm }, + { X86::CMOVNS16rr, X86::CMOVNS16rm }, + { X86::CMOVNS32rr, X86::CMOVNS32rm }, + { X86::CMOVNS64rr, X86::CMOVNS64rm }, + { X86::CMOVP16rr, X86::CMOVP16rm }, + { X86::CMOVP32rr, X86::CMOVP32rm }, + { X86::CMOVP64rr, X86::CMOVP64rm }, + { X86::CMOVS16rr, X86::CMOVS16rm }, + { X86::CMOVS32rr, X86::CMOVS32rm }, + { X86::CMOVS64rr, X86::CMOVS64rm }, + { X86::CMPPDrri, X86::CMPPDrmi }, + { X86::CMPPSrri, X86::CMPPSrmi }, + { X86::CMPSDrr, X86::CMPSDrm }, + { X86::CMPSSrr, X86::CMPSSrm }, + { X86::DIVPDrr, X86::DIVPDrm }, + { X86::DIVPSrr, X86::DIVPSrm }, + { X86::DIVSDrr, X86::DIVSDrm }, + { X86::DIVSSrr, X86::DIVSSrm }, + { X86::HADDPDrr, X86::HADDPDrm }, + { X86::HADDPSrr, X86::HADDPSrm }, + { X86::HSUBPDrr, X86::HSUBPDrm }, + { X86::HSUBPSrr, X86::HSUBPSrm }, + { X86::IMUL16rr, X86::IMUL16rm }, + { X86::IMUL32rr, X86::IMUL32rm }, + { X86::IMUL64rr, X86::IMUL64rm }, + { X86::MAXPDrr, X86::MAXPDrm }, + { X86::MAXPDrr_Int, X86::MAXPDrm_Int }, + { X86::MAXPSrr, X86::MAXPSrm }, + { X86::MAXPSrr_Int, X86::MAXPSrm_Int }, + { X86::MAXSDrr, X86::MAXSDrm }, + { X86::MAXSDrr_Int, X86::MAXSDrm_Int }, + { X86::MAXSSrr, X86::MAXSSrm }, + { X86::MAXSSrr_Int, X86::MAXSSrm_Int }, + { X86::MINPDrr, X86::MINPDrm }, + { X86::MINPDrr_Int, X86::MINPDrm_Int }, + { X86::MINPSrr, X86::MINPSrm }, + { X86::MINPSrr_Int, X86::MINPSrm_Int }, + { X86::MINSDrr, X86::MINSDrm }, + { X86::MINSDrr_Int, X86::MINSDrm_Int }, + { X86::MINSSrr, X86::MINSSrm }, + { X86::MINSSrr_Int, X86::MINSSrm_Int }, + { X86::MULPDrr, X86::MULPDrm }, + { X86::MULPSrr, X86::MULPSrm }, + { X86::MULSDrr, X86::MULSDrm }, + { X86::MULSSrr, X86::MULSSrm }, + { X86::OR16rr, X86::OR16rm }, + { X86::OR32rr, X86::OR32rm }, + { X86::OR64rr, X86::OR64rm }, + { X86::OR8rr, X86::OR8rm }, + { X86::ORPDrr, X86::ORPDrm }, + { X86::ORPSrr, X86::ORPSrm }, + { X86::PACKSSDWrr, X86::PACKSSDWrm }, + { X86::PACKSSWBrr, X86::PACKSSWBrm }, + { X86::PACKUSWBrr, X86::PACKUSWBrm }, + { X86::PADDBrr, X86::PADDBrm }, + { X86::PADDDrr, X86::PADDDrm }, + { X86::PADDQrr, X86::PADDQrm }, + { X86::PADDSBrr, X86::PADDSBrm }, + { X86::PADDSWrr, X86::PADDSWrm }, + { X86::PADDWrr, X86::PADDWrm }, + { X86::PANDNrr, X86::PANDNrm }, + { X86::PANDrr, X86::PANDrm }, + { X86::PAVGBrr, X86::PAVGBrm }, + { X86::PAVGWrr, X86::PAVGWrm }, + { X86::PCMPEQBrr, X86::PCMPEQBrm }, + { X86::PCMPEQDrr, X86::PCMPEQDrm }, + { X86::PCMPEQWrr, X86::PCMPEQWrm }, + { X86::PCMPGTBrr, X86::PCMPGTBrm }, + { X86::PCMPGTDrr, X86::PCMPGTDrm }, + { X86::PCMPGTWrr, X86::PCMPGTWrm }, + { X86::PINSRWrri, X86::PINSRWrmi }, + { X86::PMADDWDrr, X86::PMADDWDrm }, + { X86::PMAXSWrr, X86::PMAXSWrm }, + { X86::PMAXUBrr, X86::PMAXUBrm }, + { X86::PMINSWrr, X86::PMINSWrm }, + { X86::PMINUBrr, X86::PMINUBrm }, + { X86::PMULHUWrr, X86::PMULHUWrm }, + { X86::PMULHWrr, X86::PMULHWrm }, + { X86::PMULLWrr, X86::PMULLWrm }, + { X86::PMULUDQrr, X86::PMULUDQrm }, + { X86::PORrr, X86::PORrm }, + { X86::PSADBWrr, X86::PSADBWrm }, + { X86::PSLLDrr, X86::PSLLDrm }, + { X86::PSLLQrr, X86::PSLLQrm }, + { X86::PSLLWrr, X86::PSLLWrm }, + { X86::PSRADrr, X86::PSRADrm }, + { X86::PSRAWrr, X86::PSRAWrm }, + { X86::PSRLDrr, X86::PSRLDrm }, + { X86::PSRLQrr, X86::PSRLQrm }, + { X86::PSRLWrr, X86::PSRLWrm }, + { X86::PSUBBrr, X86::PSUBBrm }, + { X86::PSUBDrr, X86::PSUBDrm }, + { X86::PSUBSBrr, X86::PSUBSBrm }, + { X86::PSUBSWrr, X86::PSUBSWrm }, + { X86::PSUBWrr, X86::PSUBWrm }, + { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm }, + { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm }, + { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm }, + { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm }, + { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm }, + { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm }, + { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm }, + { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm }, + { X86::PXORrr, X86::PXORrm }, + { X86::SBB32rr, X86::SBB32rm }, + { X86::SBB64rr, X86::SBB64rm }, + { X86::SHUFPDrri, X86::SHUFPDrmi }, + { X86::SHUFPSrri, X86::SHUFPSrmi }, + { X86::SUB16rr, X86::SUB16rm }, + { X86::SUB32rr, X86::SUB32rm }, + { X86::SUB64rr, X86::SUB64rm }, + { X86::SUB8rr, X86::SUB8rm }, + { X86::SUBPDrr, X86::SUBPDrm }, + { X86::SUBPSrr, X86::SUBPSrm }, + { X86::SUBSDrr, X86::SUBSDrm }, + { X86::SUBSSrr, X86::SUBSSrm }, + // FIXME: TEST*rr -> swapped operand of TEST*mr. + { X86::UNPCKHPDrr, X86::UNPCKHPDrm }, + { X86::UNPCKHPSrr, X86::UNPCKHPSrm }, + { X86::UNPCKLPDrr, X86::UNPCKLPDrm }, + { X86::UNPCKLPSrr, X86::UNPCKLPSrm }, + { X86::XOR16rr, X86::XOR16rm }, + { X86::XOR32rr, X86::XOR32rm }, + { X86::XOR64rr, X86::XOR64rm }, + { X86::XOR8rr, X86::XOR8rm }, + { X86::XORPDrr, X86::XORPDrm }, + { X86::XORPSrr, X86::XORPSrm } + }; + + for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { + unsigned RegOp = OpTbl2[i][0]; + unsigned MemOp = OpTbl2[i][1]; + if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp, MemOp))) + assert(false && "Duplicated entries?"); + unsigned AuxInfo = 2 | (1 << 4); // Index 1, folded load + if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, + std::make_pair(RegOp, AuxInfo)))) + AmbEntries.push_back(MemOp); + } + + // Remove ambiguous entries. + assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?"); } bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, @@ -1026,6 +1628,435 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } +static MachineInstr *FuseTwoAddrInst(unsigned Opcode, + SmallVector &MOs, + MachineInstr *MI, const TargetInstrInfo &TII) { + // Create the base instruction with the memory operand as the first part. + MachineInstr *NewMI = new MachineInstr(TII.get(Opcode), true); + MachineInstrBuilder MIB(NewMI); + unsigned NumAddrOps = MOs.size(); + for (unsigned i = 0; i != NumAddrOps; ++i) + MIB = X86InstrAddOperand(MIB, MOs[i]); + if (NumAddrOps < 4) // FrameIndex only + MIB.addImm(1).addReg(0).addImm(0); + + // Loop over the rest of the ri operands, converting them over. + unsigned NumOps = TII.getNumOperands(MI->getOpcode())-2; + for (unsigned i = 0; i != NumOps; ++i) { + MachineOperand &MO = MI->getOperand(i+2); + MIB = X86InstrAddOperand(MIB, MO); + } + for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + MIB = X86InstrAddOperand(MIB, MO); + } + return MIB; +} + +static MachineInstr *FuseInst(unsigned Opcode, unsigned OpNo, + SmallVector &MOs, + MachineInstr *MI, const TargetInstrInfo &TII) { + MachineInstr *NewMI = new MachineInstr(TII.get(Opcode), true); + MachineInstrBuilder MIB(NewMI); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (i == OpNo) { + assert(MO.isRegister() && "Expected to fold into reg operand!"); + unsigned NumAddrOps = MOs.size(); + for (unsigned i = 0; i != NumAddrOps; ++i) + MIB = X86InstrAddOperand(MIB, MOs[i]); + if (NumAddrOps < 4) // FrameIndex only + MIB.addImm(1).addReg(0).addImm(0); + } else { + MIB = X86InstrAddOperand(MIB, MO); + } + } + return MIB; +} + +static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, + SmallVector &MOs, + MachineInstr *MI) { + MachineInstrBuilder MIB = BuildMI(TII.get(Opcode)); + + unsigned NumAddrOps = MOs.size(); + for (unsigned i = 0; i != NumAddrOps; ++i) + MIB = X86InstrAddOperand(MIB, MOs[i]); + if (NumAddrOps < 4) // FrameIndex only + MIB.addImm(1).addReg(0).addImm(0); + return MIB.addImm(0); +} + +MachineInstr* +X86InstrInfo::foldMemoryOperand(MachineInstr *MI, unsigned i, + SmallVector &MOs) const { + const DenseMap *OpcodeTablePtr = NULL; + bool isTwoAddrFold = false; + unsigned NumOps = getNumOperands(MI->getOpcode()); + bool isTwoAddr = NumOps > 1 && + MI->getInstrDescriptor()->getOperandConstraint(1, TOI::TIED_TO) != -1; + + MachineInstr *NewMI = NULL; + // Folding a memory location into the two-address part of a two-address + // instruction is different than folding it other places. It requires + // replacing the *two* registers with the memory location. + if (isTwoAddr && NumOps >= 2 && i < 2 && + MI->getOperand(0).isRegister() && + MI->getOperand(1).isRegister() && + MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { + OpcodeTablePtr = &RegOp2MemOpTable2Addr; + isTwoAddrFold = true; + } else if (i == 0) { // If operand 0 + if (MI->getOpcode() == X86::MOV16r0) + NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI); + else if (MI->getOpcode() == X86::MOV32r0) + NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); + else if (MI->getOpcode() == X86::MOV64r0) + NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI); + else if (MI->getOpcode() == X86::MOV8r0) + NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); + if (NewMI) { + NewMI->copyKillDeadInfo(MI); + return NewMI; + } + + OpcodeTablePtr = &RegOp2MemOpTable0; + } else if (i == 1) { + OpcodeTablePtr = &RegOp2MemOpTable1; + } else if (i == 2) { + OpcodeTablePtr = &RegOp2MemOpTable2; + } + + // If table selected... + if (OpcodeTablePtr) { + // Find the Opcode to fuse + DenseMap::iterator I = + OpcodeTablePtr->find((unsigned*)MI->getOpcode()); + if (I != OpcodeTablePtr->end()) { + if (isTwoAddrFold) + NewMI = FuseTwoAddrInst(I->second, MOs, MI, *this); + else + NewMI = FuseInst(I->second, i, MOs, MI, *this); + NewMI->copyKillDeadInfo(MI); + return NewMI; + } + } + + // No fusion + if (PrintFailedFusing) + cerr << "We failed to fuse (" + << ((i == 1) ? "r" : "s") << "): " << *MI; + return NULL; +} + + +MachineInstr* X86InstrInfo::foldMemoryOperand(MachineInstr *MI, + SmallVectorImpl &Ops, + int FrameIndex) const { + // Check switch flag + if (NoFusing) return NULL; + + if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { + unsigned NewOpc = 0; + switch (MI->getOpcode()) { + default: return NULL; + case X86::TEST8rr: NewOpc = X86::CMP8ri; break; + case X86::TEST16rr: NewOpc = X86::CMP16ri; break; + case X86::TEST32rr: NewOpc = X86::CMP32ri; break; + case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; + } + // Change to CMPXXri r, 0 first. + MI->setInstrDescriptor(get(NewOpc)); + MI->getOperand(1).ChangeToImmediate(0); + } else if (Ops.size() != 1) + return NULL; + + SmallVector MOs; + MOs.push_back(MachineOperand::CreateFI(FrameIndex)); + return foldMemoryOperand(MI, Ops[0], MOs); +} + +MachineInstr* X86InstrInfo::foldMemoryOperand(MachineInstr *MI, + SmallVectorImpl &Ops, + MachineInstr *LoadMI) const { + // Check switch flag + if (NoFusing) return NULL; + + if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { + unsigned NewOpc = 0; + switch (MI->getOpcode()) { + default: return NULL; + case X86::TEST8rr: NewOpc = X86::CMP8ri; break; + case X86::TEST16rr: NewOpc = X86::CMP16ri; break; + case X86::TEST32rr: NewOpc = X86::CMP32ri; break; + case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; + } + // Change to CMPXXri r, 0 first. + MI->setInstrDescriptor(get(NewOpc)); + MI->getOperand(1).ChangeToImmediate(0); + } else if (Ops.size() != 1) + return NULL; + + SmallVector MOs; + unsigned NumOps = getNumOperands(LoadMI->getOpcode()); + for (unsigned i = NumOps - 4; i != NumOps; ++i) + MOs.push_back(LoadMI->getOperand(i)); + return foldMemoryOperand(MI, Ops[0], MOs); +} + + +bool X86InstrInfo::canFoldMemoryOperand(MachineInstr *MI, + SmallVectorImpl &Ops) const { + // Check switch flag + if (NoFusing) return 0; + + if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { + switch (MI->getOpcode()) { + default: return false; + case X86::TEST8rr: + case X86::TEST16rr: + case X86::TEST32rr: + case X86::TEST64rr: + return true; + } + } + + if (Ops.size() != 1) + return false; + + unsigned OpNum = Ops[0]; + unsigned Opc = MI->getOpcode(); + unsigned NumOps = getNumOperands(Opc); + bool isTwoAddr = NumOps > 1 && + getOperandConstraint(Opc, 1, TOI::TIED_TO) != -1; + + // Folding a memory location into the two-address part of a two-address + // instruction is different than folding it other places. It requires + // replacing the *two* registers with the memory location. + const DenseMap *OpcodeTablePtr = NULL; + if (isTwoAddr && NumOps >= 2 && OpNum < 2) { + OpcodeTablePtr = &RegOp2MemOpTable2Addr; + } else if (OpNum == 0) { // If operand 0 + switch (Opc) { + case X86::MOV16r0: + case X86::MOV32r0: + case X86::MOV64r0: + case X86::MOV8r0: + return true; + default: break; + } + OpcodeTablePtr = &RegOp2MemOpTable0; + } else if (OpNum == 1) { + OpcodeTablePtr = &RegOp2MemOpTable1; + } else if (OpNum == 2) { + OpcodeTablePtr = &RegOp2MemOpTable2; + } + + if (OpcodeTablePtr) { + // Find the Opcode to fuse + DenseMap::iterator I = + OpcodeTablePtr->find((unsigned*)Opc); + if (I != OpcodeTablePtr->end()) + return true; + } + return false; +} + +bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, + unsigned Reg, bool UnfoldLoad, bool UnfoldStore, + SmallVectorImpl &NewMIs) const { + DenseMap >::iterator I = + MemOp2RegOpTable.find((unsigned*)MI->getOpcode()); + if (I == MemOp2RegOpTable.end()) + return false; + unsigned Opc = I->second.first; + unsigned Index = I->second.second & 0xf; + bool FoldedLoad = I->second.second & (1 << 4); + bool FoldedStore = I->second.second & (1 << 5); + if (UnfoldLoad && !FoldedLoad) + return false; + UnfoldLoad &= FoldedLoad; + if (UnfoldStore && !FoldedStore) + return false; + UnfoldStore &= FoldedStore; + + const TargetInstrDescriptor &TID = get(Opc); + const TargetOperandInfo &TOI = TID.OpInfo[Index]; + const TargetRegisterClass *RC = (TOI.Flags & M_LOOK_UP_PTR_REG_CLASS) + ? getPointerRegClass() : RI.getRegClass(TOI.RegClass); + SmallVector AddrOps; + SmallVector BeforeOps; + SmallVector AfterOps; + SmallVector ImpOps; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &Op = MI->getOperand(i); + if (i >= Index && i < Index+4) + AddrOps.push_back(Op); + else if (Op.isRegister() && Op.isImplicit()) + ImpOps.push_back(Op); + else if (i < Index) + BeforeOps.push_back(Op); + else if (i > Index) + AfterOps.push_back(Op); + } + + // Emit the load instruction. + if (UnfoldLoad) { + loadRegFromAddr(MF, Reg, AddrOps, RC, NewMIs); + if (UnfoldStore) { + // Address operands cannot be marked isKill. + for (unsigned i = 1; i != 5; ++i) { + MachineOperand &MO = NewMIs[0]->getOperand(i); + if (MO.isRegister()) + MO.setIsKill(false); + } + } + } + + // Emit the data processing instruction. + MachineInstr *DataMI = new MachineInstr(TID, true); + MachineInstrBuilder MIB(DataMI); + + if (FoldedStore) + MIB.addReg(Reg, true); + for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i) + MIB = X86InstrAddOperand(MIB, BeforeOps[i]); + if (FoldedLoad) + MIB.addReg(Reg); + for (unsigned i = 0, e = AfterOps.size(); i != e; ++i) + MIB = X86InstrAddOperand(MIB, AfterOps[i]); + for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) { + MachineOperand &MO = ImpOps[i]; + MIB.addReg(MO.getReg(), MO.isDef(), true, MO.isKill(), MO.isDead()); + } + // Change CMP32ri r, 0 back to TEST32rr r, r, etc. + unsigned NewOpc = 0; + switch (DataMI->getOpcode()) { + default: break; + case X86::CMP64ri32: + case X86::CMP32ri: + case X86::CMP16ri: + case X86::CMP8ri: { + MachineOperand &MO0 = DataMI->getOperand(0); + MachineOperand &MO1 = DataMI->getOperand(1); + if (MO1.getImm() == 0) { + switch (DataMI->getOpcode()) { + default: break; + case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; + case X86::CMP32ri: NewOpc = X86::TEST32rr; break; + case X86::CMP16ri: NewOpc = X86::TEST16rr; break; + case X86::CMP8ri: NewOpc = X86::TEST8rr; break; + } + DataMI->setInstrDescriptor(get(NewOpc)); + MO1.ChangeToRegister(MO0.getReg(), false); + } + } + } + NewMIs.push_back(DataMI); + + // Emit the store instruction. + if (UnfoldStore) { + const TargetOperandInfo &DstTOI = TID.OpInfo[0]; + const TargetRegisterClass *DstRC = (DstTOI.Flags & M_LOOK_UP_PTR_REG_CLASS) + ? getPointerRegClass() : RI.getRegClass(DstTOI.RegClass); + storeRegToAddr(MF, Reg, true, AddrOps, DstRC, NewMIs); + } + + return true; +} + +bool +X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, + SmallVectorImpl &NewNodes) const { + if (!N->isTargetOpcode()) + return false; + + DenseMap >::iterator I = + MemOp2RegOpTable.find((unsigned*)N->getTargetOpcode()); + if (I == MemOp2RegOpTable.end()) + return false; + unsigned Opc = I->second.first; + unsigned Index = I->second.second & 0xf; + bool FoldedLoad = I->second.second & (1 << 4); + bool FoldedStore = I->second.second & (1 << 5); + const TargetInstrDescriptor &TID = get(Opc); + const TargetOperandInfo &TOI = TID.OpInfo[Index]; + const TargetRegisterClass *RC = (TOI.Flags & M_LOOK_UP_PTR_REG_CLASS) + ? getPointerRegClass() : RI.getRegClass(TOI.RegClass); + std::vector AddrOps; + std::vector BeforeOps; + std::vector AfterOps; + unsigned NumOps = N->getNumOperands(); + for (unsigned i = 0; i != NumOps-1; ++i) { + SDOperand Op = N->getOperand(i); + if (i >= Index && i < Index+4) + AddrOps.push_back(Op); + else if (i < Index) + BeforeOps.push_back(Op); + else if (i > Index) + AfterOps.push_back(Op); + } + SDOperand Chain = N->getOperand(NumOps-1); + AddrOps.push_back(Chain); + + // Emit the load instruction. + SDNode *Load = 0; + if (FoldedLoad) { + MVT::ValueType VT = *RC->vt_begin(); + Load = DAG.getTargetNode(getLoadRegOpcode(RC, RI.getStackAlignment()), VT, + MVT::Other, &AddrOps[0], AddrOps.size()); + NewNodes.push_back(Load); + } + + // Emit the data processing instruction. + std::vector VTs; + const TargetRegisterClass *DstRC = 0; + if (TID.numDefs > 0) { + const TargetOperandInfo &DstTOI = TID.OpInfo[0]; + DstRC = (DstTOI.Flags & M_LOOK_UP_PTR_REG_CLASS) + ? getPointerRegClass() : RI.getRegClass(DstTOI.RegClass); + VTs.push_back(*DstRC->vt_begin()); + } + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + MVT::ValueType VT = N->getValueType(i); + if (VT != MVT::Other && i >= TID.numDefs) + VTs.push_back(VT); + } + if (Load) + BeforeOps.push_back(SDOperand(Load, 0)); + std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps)); + SDNode *NewNode= DAG.getTargetNode(Opc, VTs, &BeforeOps[0], BeforeOps.size()); + NewNodes.push_back(NewNode); + + // Emit the store instruction. + if (FoldedStore) { + AddrOps.pop_back(); + AddrOps.push_back(SDOperand(NewNode, 0)); + AddrOps.push_back(Chain); + SDNode *Store = DAG.getTargetNode(getStoreRegOpcode(DstRC, RI.getStackAlignment()), + MVT::Other, &AddrOps[0], AddrOps.size()); + NewNodes.push_back(Store); + } + + return true; +} + +unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, + bool UnfoldLoad, bool UnfoldStore) const { + DenseMap >::iterator I = + MemOp2RegOpTable.find((unsigned*)Opc); + if (I == MemOp2RegOpTable.end()) + return 0; + bool FoldedLoad = I->second.second & (1 << 4); + bool FoldedStore = I->second.second & (1 << 5); + if (UnfoldLoad && !FoldedLoad) + return 0; + if (UnfoldStore && !FoldedStore) + return 0; + return I->second.first; +} + bool X86InstrInfo::BlockHasNoFallThrough(MachineBasicBlock &MBB) const { if (MBB.empty()) return false; diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index d39b865..ccee43d 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -225,6 +225,19 @@ namespace X86II { class X86InstrInfo : public TargetInstrInfoImpl { X86TargetMachine &TM; const X86RegisterInfo RI; + + /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1, + /// RegOp2MemOpTable2 - Load / store folding opcode maps. + /// + DenseMap RegOp2MemOpTable2Addr; + DenseMap RegOp2MemOpTable0; + DenseMap RegOp2MemOpTable1; + DenseMap RegOp2MemOpTable2; + + /// MemOp2RegOpTable - Load / store unfolding opcode map. + /// + DenseMap > MemOp2RegOpTable; + public: X86InstrInfo(X86TargetMachine &tm); @@ -305,6 +318,44 @@ public: MachineBasicBlock::iterator MI, const std::vector &CSI) const; + /// foldMemoryOperand - If this target supports it, fold a load or store of + /// the specified stack slot into the specified machine instruction for the + /// specified operand(s). If this is possible, the target should perform the + /// folding and return true, otherwise it should return false. If it folds + /// the instruction, it is likely that the MachineInstruction the iterator + /// references has been changed. + virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, + SmallVectorImpl &Ops, + int FrameIndex) const; + + /// foldMemoryOperand - Same as the previous version except it allows folding + /// of any load and store from / to any address, not just from a specific + /// stack slot. + virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, + SmallVectorImpl &Ops, + MachineInstr* LoadMI) const; + + /// canFoldMemoryOperand - Returns true if the specified load / store is + /// folding is possible. + virtual bool canFoldMemoryOperand(MachineInstr*, SmallVectorImpl &) const; + + /// unfoldMemoryOperand - Separate a single instruction which folded a load or + /// a store or a load and a store into two or more instruction. If this is + /// possible, returns true as well as the new instructions by reference. + virtual bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, + unsigned Reg, bool UnfoldLoad, bool UnfoldStore, + SmallVectorImpl &NewMIs) const; + + virtual bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, + SmallVectorImpl &NewNodes) const; + + /// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new + /// instruction after load / store are unfolded from an instruction of the + /// specified opcode. It returns zero if the specified unfolding is not + /// possible. + virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, + bool UnfoldLoad, bool UnfoldStore) const; + virtual bool BlockHasNoFallThrough(MachineBasicBlock &MBB) const; virtual bool ReverseBranchCondition(std::vector &Cond) const; @@ -319,6 +370,11 @@ public: unsigned char getBaseOpcodeFor(MachineOpCode Opcode) const { return getBaseOpcodeFor(&get(Opcode)); } + +private: + MachineInstr* foldMemoryOperand(MachineInstr* MI, + unsigned OpNum, + SmallVector &MOs) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 49effcf..34a860c 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -33,22 +33,10 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Support/CommandLine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" using namespace llvm; -namespace { - cl::opt - NoFusing("disable-spill-fusing", - cl::desc("Disable fusing of spill code into instructions")); - cl::opt - PrintFailedFusing("print-failed-fuse-candidates", - cl::desc("Print instructions that the allocator wants to" - " fuse, but the X86 backend currently can't"), - cl::Hidden); -} - X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii) : X86GenRegisterInfo(X86::ADJCALLSTACKDOWN, X86::ADJCALLSTACKUP), @@ -66,596 +54,6 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, StackPtr = X86::ESP; FramePtr = X86::EBP; } - - SmallVector AmbEntries; - static const unsigned OpTbl2Addr[][2] = { - { X86::ADC32ri, X86::ADC32mi }, - { X86::ADC32ri8, X86::ADC32mi8 }, - { X86::ADC32rr, X86::ADC32mr }, - { X86::ADC64ri32, X86::ADC64mi32 }, - { X86::ADC64ri8, X86::ADC64mi8 }, - { X86::ADC64rr, X86::ADC64mr }, - { X86::ADD16ri, X86::ADD16mi }, - { X86::ADD16ri8, X86::ADD16mi8 }, - { X86::ADD16rr, X86::ADD16mr }, - { X86::ADD32ri, X86::ADD32mi }, - { X86::ADD32ri8, X86::ADD32mi8 }, - { X86::ADD32rr, X86::ADD32mr }, - { X86::ADD64ri32, X86::ADD64mi32 }, - { X86::ADD64ri8, X86::ADD64mi8 }, - { X86::ADD64rr, X86::ADD64mr }, - { X86::ADD8ri, X86::ADD8mi }, - { X86::ADD8rr, X86::ADD8mr }, - { X86::AND16ri, X86::AND16mi }, - { X86::AND16ri8, X86::AND16mi8 }, - { X86::AND16rr, X86::AND16mr }, - { X86::AND32ri, X86::AND32mi }, - { X86::AND32ri8, X86::AND32mi8 }, - { X86::AND32rr, X86::AND32mr }, - { X86::AND64ri32, X86::AND64mi32 }, - { X86::AND64ri8, X86::AND64mi8 }, - { X86::AND64rr, X86::AND64mr }, - { X86::AND8ri, X86::AND8mi }, - { X86::AND8rr, X86::AND8mr }, - { X86::DEC16r, X86::DEC16m }, - { X86::DEC32r, X86::DEC32m }, - { X86::DEC64_16r, X86::DEC64_16m }, - { X86::DEC64_32r, X86::DEC64_32m }, - { X86::DEC64r, X86::DEC64m }, - { X86::DEC8r, X86::DEC8m }, - { X86::INC16r, X86::INC16m }, - { X86::INC32r, X86::INC32m }, - { X86::INC64_16r, X86::INC64_16m }, - { X86::INC64_32r, X86::INC64_32m }, - { X86::INC64r, X86::INC64m }, - { X86::INC8r, X86::INC8m }, - { X86::NEG16r, X86::NEG16m }, - { X86::NEG32r, X86::NEG32m }, - { X86::NEG64r, X86::NEG64m }, - { X86::NEG8r, X86::NEG8m }, - { X86::NOT16r, X86::NOT16m }, - { X86::NOT32r, X86::NOT32m }, - { X86::NOT64r, X86::NOT64m }, - { X86::NOT8r, X86::NOT8m }, - { X86::OR16ri, X86::OR16mi }, - { X86::OR16ri8, X86::OR16mi8 }, - { X86::OR16rr, X86::OR16mr }, - { X86::OR32ri, X86::OR32mi }, - { X86::OR32ri8, X86::OR32mi8 }, - { X86::OR32rr, X86::OR32mr }, - { X86::OR64ri32, X86::OR64mi32 }, - { X86::OR64ri8, X86::OR64mi8 }, - { X86::OR64rr, X86::OR64mr }, - { X86::OR8ri, X86::OR8mi }, - { X86::OR8rr, X86::OR8mr }, - { X86::ROL16r1, X86::ROL16m1 }, - { X86::ROL16rCL, X86::ROL16mCL }, - { X86::ROL16ri, X86::ROL16mi }, - { X86::ROL32r1, X86::ROL32m1 }, - { X86::ROL32rCL, X86::ROL32mCL }, - { X86::ROL32ri, X86::ROL32mi }, - { X86::ROL64r1, X86::ROL64m1 }, - { X86::ROL64rCL, X86::ROL64mCL }, - { X86::ROL64ri, X86::ROL64mi }, - { X86::ROL8r1, X86::ROL8m1 }, - { X86::ROL8rCL, X86::ROL8mCL }, - { X86::ROL8ri, X86::ROL8mi }, - { X86::ROR16r1, X86::ROR16m1 }, - { X86::ROR16rCL, X86::ROR16mCL }, - { X86::ROR16ri, X86::ROR16mi }, - { X86::ROR32r1, X86::ROR32m1 }, - { X86::ROR32rCL, X86::ROR32mCL }, - { X86::ROR32ri, X86::ROR32mi }, - { X86::ROR64r1, X86::ROR64m1 }, - { X86::ROR64rCL, X86::ROR64mCL }, - { X86::ROR64ri, X86::ROR64mi }, - { X86::ROR8r1, X86::ROR8m1 }, - { X86::ROR8rCL, X86::ROR8mCL }, - { X86::ROR8ri, X86::ROR8mi }, - { X86::SAR16r1, X86::SAR16m1 }, - { X86::SAR16rCL, X86::SAR16mCL }, - { X86::SAR16ri, X86::SAR16mi }, - { X86::SAR32r1, X86::SAR32m1 }, - { X86::SAR32rCL, X86::SAR32mCL }, - { X86::SAR32ri, X86::SAR32mi }, - { X86::SAR64r1, X86::SAR64m1 }, - { X86::SAR64rCL, X86::SAR64mCL }, - { X86::SAR64ri, X86::SAR64mi }, - { X86::SAR8r1, X86::SAR8m1 }, - { X86::SAR8rCL, X86::SAR8mCL }, - { X86::SAR8ri, X86::SAR8mi }, - { X86::SBB32ri, X86::SBB32mi }, - { X86::SBB32ri8, X86::SBB32mi8 }, - { X86::SBB32rr, X86::SBB32mr }, - { X86::SBB64ri32, X86::SBB64mi32 }, - { X86::SBB64ri8, X86::SBB64mi8 }, - { X86::SBB64rr, X86::SBB64mr }, - { X86::SHL16r1, X86::SHL16m1 }, - { X86::SHL16rCL, X86::SHL16mCL }, - { X86::SHL16ri, X86::SHL16mi }, - { X86::SHL32r1, X86::SHL32m1 }, - { X86::SHL32rCL, X86::SHL32mCL }, - { X86::SHL32ri, X86::SHL32mi }, - { X86::SHL64r1, X86::SHL64m1 }, - { X86::SHL64rCL, X86::SHL64mCL }, - { X86::SHL64ri, X86::SHL64mi }, - { X86::SHL8r1, X86::SHL8m1 }, - { X86::SHL8rCL, X86::SHL8mCL }, - { X86::SHL8ri, X86::SHL8mi }, - { X86::SHLD16rrCL, X86::SHLD16mrCL }, - { X86::SHLD16rri8, X86::SHLD16mri8 }, - { X86::SHLD32rrCL, X86::SHLD32mrCL }, - { X86::SHLD32rri8, X86::SHLD32mri8 }, - { X86::SHLD64rrCL, X86::SHLD64mrCL }, - { X86::SHLD64rri8, X86::SHLD64mri8 }, - { X86::SHR16r1, X86::SHR16m1 }, - { X86::SHR16rCL, X86::SHR16mCL }, - { X86::SHR16ri, X86::SHR16mi }, - { X86::SHR32r1, X86::SHR32m1 }, - { X86::SHR32rCL, X86::SHR32mCL }, - { X86::SHR32ri, X86::SHR32mi }, - { X86::SHR64r1, X86::SHR64m1 }, - { X86::SHR64rCL, X86::SHR64mCL }, - { X86::SHR64ri, X86::SHR64mi }, - { X86::SHR8r1, X86::SHR8m1 }, - { X86::SHR8rCL, X86::SHR8mCL }, - { X86::SHR8ri, X86::SHR8mi }, - { X86::SHRD16rrCL, X86::SHRD16mrCL }, - { X86::SHRD16rri8, X86::SHRD16mri8 }, - { X86::SHRD32rrCL, X86::SHRD32mrCL }, - { X86::SHRD32rri8, X86::SHRD32mri8 }, - { X86::SHRD64rrCL, X86::SHRD64mrCL }, - { X86::SHRD64rri8, X86::SHRD64mri8 }, - { X86::SUB16ri, X86::SUB16mi }, - { X86::SUB16ri8, X86::SUB16mi8 }, - { X86::SUB16rr, X86::SUB16mr }, - { X86::SUB32ri, X86::SUB32mi }, - { X86::SUB32ri8, X86::SUB32mi8 }, - { X86::SUB32rr, X86::SUB32mr }, - { X86::SUB64ri32, X86::SUB64mi32 }, - { X86::SUB64ri8, X86::SUB64mi8 }, - { X86::SUB64rr, X86::SUB64mr }, - { X86::SUB8ri, X86::SUB8mi }, - { X86::SUB8rr, X86::SUB8mr }, - { X86::XOR16ri, X86::XOR16mi }, - { X86::XOR16ri8, X86::XOR16mi8 }, - { X86::XOR16rr, X86::XOR16mr }, - { X86::XOR32ri, X86::XOR32mi }, - { X86::XOR32ri8, X86::XOR32mi8 }, - { X86::XOR32rr, X86::XOR32mr }, - { X86::XOR64ri32, X86::XOR64mi32 }, - { X86::XOR64ri8, X86::XOR64mi8 }, - { X86::XOR64rr, X86::XOR64mr }, - { X86::XOR8ri, X86::XOR8mi }, - { X86::XOR8rr, X86::XOR8mr } - }; - - for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { - unsigned RegOp = OpTbl2Addr[i][0]; - unsigned MemOp = OpTbl2Addr[i][1]; - if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp, MemOp))) - assert(false && "Duplicated entries?"); - unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); // Index 0,folded load and store - if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, - std::make_pair(RegOp, AuxInfo)))) - AmbEntries.push_back(MemOp); - } - - // If the third value is 1, then it's folding either a load or a store. - static const unsigned OpTbl0[][3] = { - { X86::CALL32r, X86::CALL32m, 1 }, - { X86::CALL64r, X86::CALL64m, 1 }, - { X86::CMP16ri, X86::CMP16mi, 1 }, - { X86::CMP16ri8, X86::CMP16mi8, 1 }, - { X86::CMP32ri, X86::CMP32mi, 1 }, - { X86::CMP32ri8, X86::CMP32mi8, 1 }, - { X86::CMP64ri32, X86::CMP64mi32, 1 }, - { X86::CMP64ri8, X86::CMP64mi8, 1 }, - { X86::CMP8ri, X86::CMP8mi, 1 }, - { X86::DIV16r, X86::DIV16m, 1 }, - { X86::DIV32r, X86::DIV32m, 1 }, - { X86::DIV64r, X86::DIV64m, 1 }, - { X86::DIV8r, X86::DIV8m, 1 }, - { X86::FsMOVAPDrr, X86::MOVSDmr, 0 }, - { X86::FsMOVAPSrr, X86::MOVSSmr, 0 }, - { X86::IDIV16r, X86::IDIV16m, 1 }, - { X86::IDIV32r, X86::IDIV32m, 1 }, - { X86::IDIV64r, X86::IDIV64m, 1 }, - { X86::IDIV8r, X86::IDIV8m, 1 }, - { X86::IMUL16r, X86::IMUL16m, 1 }, - { X86::IMUL32r, X86::IMUL32m, 1 }, - { X86::IMUL64r, X86::IMUL64m, 1 }, - { X86::IMUL8r, X86::IMUL8m, 1 }, - { X86::JMP32r, X86::JMP32m, 1 }, - { X86::JMP64r, X86::JMP64m, 1 }, - { X86::MOV16ri, X86::MOV16mi, 0 }, - { X86::MOV16rr, X86::MOV16mr, 0 }, - { X86::MOV16to16_, X86::MOV16_mr, 0 }, - { X86::MOV32ri, X86::MOV32mi, 0 }, - { X86::MOV32rr, X86::MOV32mr, 0 }, - { X86::MOV32to32_, X86::MOV32_mr, 0 }, - { X86::MOV64ri32, X86::MOV64mi32, 0 }, - { X86::MOV64rr, X86::MOV64mr, 0 }, - { X86::MOV8ri, X86::MOV8mi, 0 }, - { X86::MOV8rr, X86::MOV8mr, 0 }, - { X86::MOVAPDrr, X86::MOVAPDmr, 0 }, - { X86::MOVAPSrr, X86::MOVAPSmr, 0 }, - { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0 }, - { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0 }, - { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0 }, - { X86::MOVSDrr, X86::MOVSDmr, 0 }, - { X86::MOVSDto64rr, X86::MOVSDto64mr, 0 }, - { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0 }, - { X86::MOVSSrr, X86::MOVSSmr, 0 }, - { X86::MOVUPDrr, X86::MOVUPDmr, 0 }, - { X86::MOVUPSrr, X86::MOVUPSmr, 0 }, - { X86::MUL16r, X86::MUL16m, 1 }, - { X86::MUL32r, X86::MUL32m, 1 }, - { X86::MUL64r, X86::MUL64m, 1 }, - { X86::MUL8r, X86::MUL8m, 1 }, - { X86::SETAEr, X86::SETAEm, 0 }, - { X86::SETAr, X86::SETAm, 0 }, - { X86::SETBEr, X86::SETBEm, 0 }, - { X86::SETBr, X86::SETBm, 0 }, - { X86::SETEr, X86::SETEm, 0 }, - { X86::SETGEr, X86::SETGEm, 0 }, - { X86::SETGr, X86::SETGm, 0 }, - { X86::SETLEr, X86::SETLEm, 0 }, - { X86::SETLr, X86::SETLm, 0 }, - { X86::SETNEr, X86::SETNEm, 0 }, - { X86::SETNPr, X86::SETNPm, 0 }, - { X86::SETNSr, X86::SETNSm, 0 }, - { X86::SETPr, X86::SETPm, 0 }, - { X86::SETSr, X86::SETSm, 0 }, - { X86::TAILJMPr, X86::TAILJMPm, 1 }, - { X86::TEST16ri, X86::TEST16mi, 1 }, - { X86::TEST32ri, X86::TEST32mi, 1 }, - { X86::TEST64ri32, X86::TEST64mi32, 1 }, - { X86::TEST8ri, X86::TEST8mi, 1 }, - { X86::XCHG16rr, X86::XCHG16mr, 0 }, - { X86::XCHG32rr, X86::XCHG32mr, 0 }, - { X86::XCHG64rr, X86::XCHG64mr, 0 }, - { X86::XCHG8rr, X86::XCHG8mr, 0 } - }; - - for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { - unsigned RegOp = OpTbl0[i][0]; - unsigned MemOp = OpTbl0[i][1]; - if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp, MemOp))) - assert(false && "Duplicated entries?"); - unsigned FoldedLoad = OpTbl0[i][2]; - // Index 0, folded load or store. - unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5); - if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) - if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, - std::make_pair(RegOp, AuxInfo)))) - AmbEntries.push_back(MemOp); - } - - static const unsigned OpTbl1[][2] = { - { X86::CMP16rr, X86::CMP16rm }, - { X86::CMP32rr, X86::CMP32rm }, - { X86::CMP64rr, X86::CMP64rm }, - { X86::CMP8rr, X86::CMP8rm }, - { X86::CVTSD2SSrr, X86::CVTSD2SSrm }, - { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm }, - { X86::CVTSI2SDrr, X86::CVTSI2SDrm }, - { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm }, - { X86::CVTSI2SSrr, X86::CVTSI2SSrm }, - { X86::CVTSS2SDrr, X86::CVTSS2SDrm }, - { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm }, - { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm }, - { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm }, - { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm }, - { X86::FsMOVAPDrr, X86::MOVSDrm }, - { X86::FsMOVAPSrr, X86::MOVSSrm }, - { X86::IMUL16rri, X86::IMUL16rmi }, - { X86::IMUL16rri8, X86::IMUL16rmi8 }, - { X86::IMUL32rri, X86::IMUL32rmi }, - { X86::IMUL32rri8, X86::IMUL32rmi8 }, - { X86::IMUL64rri32, X86::IMUL64rmi32 }, - { X86::IMUL64rri8, X86::IMUL64rmi8 }, - { X86::Int_CMPSDrr, X86::Int_CMPSDrm }, - { X86::Int_CMPSSrr, X86::Int_CMPSSrm }, - { X86::Int_COMISDrr, X86::Int_COMISDrm }, - { X86::Int_COMISSrr, X86::Int_COMISSrm }, - { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm }, - { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm }, - { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm }, - { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm }, - { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm }, - { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm }, - { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm }, - { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm }, - { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm }, - { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm }, - { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm }, - { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm }, - { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm }, - { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm }, - { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm }, - { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm }, - { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm }, - { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm }, - { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm }, - { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm }, - { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm }, - { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm }, - { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm }, - { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm }, - { X86::MOV16rr, X86::MOV16rm }, - { X86::MOV16to16_, X86::MOV16_rm }, - { X86::MOV32rr, X86::MOV32rm }, - { X86::MOV32to32_, X86::MOV32_rm }, - { X86::MOV64rr, X86::MOV64rm }, - { X86::MOV64toPQIrr, X86::MOVQI2PQIrm }, - { X86::MOV64toSDrr, X86::MOV64toSDrm }, - { X86::MOV8rr, X86::MOV8rm }, - { X86::MOVAPDrr, X86::MOVAPDrm }, - { X86::MOVAPSrr, X86::MOVAPSrm }, - { X86::MOVDDUPrr, X86::MOVDDUPrm }, - { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm }, - { X86::MOVDI2SSrr, X86::MOVDI2SSrm }, - { X86::MOVSD2PDrr, X86::MOVSD2PDrm }, - { X86::MOVSDrr, X86::MOVSDrm }, - { X86::MOVSHDUPrr, X86::MOVSHDUPrm }, - { X86::MOVSLDUPrr, X86::MOVSLDUPrm }, - { X86::MOVSS2PSrr, X86::MOVSS2PSrm }, - { X86::MOVSSrr, X86::MOVSSrm }, - { X86::MOVSX16rr8, X86::MOVSX16rm8 }, - { X86::MOVSX32rr16, X86::MOVSX32rm16 }, - { X86::MOVSX32rr8, X86::MOVSX32rm8 }, - { X86::MOVSX64rr16, X86::MOVSX64rm16 }, - { X86::MOVSX64rr32, X86::MOVSX64rm32 }, - { X86::MOVSX64rr8, X86::MOVSX64rm8 }, - { X86::MOVUPDrr, X86::MOVUPDrm }, - { X86::MOVUPSrr, X86::MOVUPSrm }, - { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm }, - { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm }, - { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm }, - { X86::MOVZX16rr8, X86::MOVZX16rm8 }, - { X86::MOVZX32rr16, X86::MOVZX32rm16 }, - { X86::MOVZX32rr8, X86::MOVZX32rm8 }, - { X86::MOVZX64rr16, X86::MOVZX64rm16 }, - { X86::MOVZX64rr8, X86::MOVZX64rm8 }, - { X86::PSHUFDri, X86::PSHUFDmi }, - { X86::PSHUFHWri, X86::PSHUFHWmi }, - { X86::PSHUFLWri, X86::PSHUFLWmi }, - { X86::PsMOVZX64rr32, X86::PsMOVZX64rm32 }, - { X86::RCPPSr, X86::RCPPSm }, - { X86::RCPPSr_Int, X86::RCPPSm_Int }, - { X86::RSQRTPSr, X86::RSQRTPSm }, - { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int }, - { X86::RSQRTSSr, X86::RSQRTSSm }, - { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int }, - { X86::SQRTPDr, X86::SQRTPDm }, - { X86::SQRTPDr_Int, X86::SQRTPDm_Int }, - { X86::SQRTPSr, X86::SQRTPSm }, - { X86::SQRTPSr_Int, X86::SQRTPSm_Int }, - { X86::SQRTSDr, X86::SQRTSDm }, - { X86::SQRTSDr_Int, X86::SQRTSDm_Int }, - { X86::SQRTSSr, X86::SQRTSSm }, - { X86::SQRTSSr_Int, X86::SQRTSSm_Int }, - { X86::TEST16rr, X86::TEST16rm }, - { X86::TEST32rr, X86::TEST32rm }, - { X86::TEST64rr, X86::TEST64rm }, - { X86::TEST8rr, X86::TEST8rm }, - // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 - { X86::UCOMISDrr, X86::UCOMISDrm }, - { X86::UCOMISSrr, X86::UCOMISSrm }, - { X86::XCHG16rr, X86::XCHG16rm }, - { X86::XCHG32rr, X86::XCHG32rm }, - { X86::XCHG64rr, X86::XCHG64rm }, - { X86::XCHG8rr, X86::XCHG8rm } - }; - - for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { - unsigned RegOp = OpTbl1[i][0]; - unsigned MemOp = OpTbl1[i][1]; - if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp, MemOp))) - assert(false && "Duplicated entries?"); - unsigned AuxInfo = 1 | (1 << 4); // Index 1, folded load - if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) - if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, - std::make_pair(RegOp, AuxInfo)))) - AmbEntries.push_back(MemOp); - } - - static const unsigned OpTbl2[][2] = { - { X86::ADC32rr, X86::ADC32rm }, - { X86::ADC64rr, X86::ADC64rm }, - { X86::ADD16rr, X86::ADD16rm }, - { X86::ADD32rr, X86::ADD32rm }, - { X86::ADD64rr, X86::ADD64rm }, - { X86::ADD8rr, X86::ADD8rm }, - { X86::ADDPDrr, X86::ADDPDrm }, - { X86::ADDPSrr, X86::ADDPSrm }, - { X86::ADDSDrr, X86::ADDSDrm }, - { X86::ADDSSrr, X86::ADDSSrm }, - { X86::ADDSUBPDrr, X86::ADDSUBPDrm }, - { X86::ADDSUBPSrr, X86::ADDSUBPSrm }, - { X86::AND16rr, X86::AND16rm }, - { X86::AND32rr, X86::AND32rm }, - { X86::AND64rr, X86::AND64rm }, - { X86::AND8rr, X86::AND8rm }, - { X86::ANDNPDrr, X86::ANDNPDrm }, - { X86::ANDNPSrr, X86::ANDNPSrm }, - { X86::ANDPDrr, X86::ANDPDrm }, - { X86::ANDPSrr, X86::ANDPSrm }, - { X86::CMOVA16rr, X86::CMOVA16rm }, - { X86::CMOVA32rr, X86::CMOVA32rm }, - { X86::CMOVA64rr, X86::CMOVA64rm }, - { X86::CMOVAE16rr, X86::CMOVAE16rm }, - { X86::CMOVAE32rr, X86::CMOVAE32rm }, - { X86::CMOVAE64rr, X86::CMOVAE64rm }, - { X86::CMOVB16rr, X86::CMOVB16rm }, - { X86::CMOVB32rr, X86::CMOVB32rm }, - { X86::CMOVB64rr, X86::CMOVB64rm }, - { X86::CMOVBE16rr, X86::CMOVBE16rm }, - { X86::CMOVBE32rr, X86::CMOVBE32rm }, - { X86::CMOVBE64rr, X86::CMOVBE64rm }, - { X86::CMOVE16rr, X86::CMOVE16rm }, - { X86::CMOVE32rr, X86::CMOVE32rm }, - { X86::CMOVE64rr, X86::CMOVE64rm }, - { X86::CMOVG16rr, X86::CMOVG16rm }, - { X86::CMOVG32rr, X86::CMOVG32rm }, - { X86::CMOVG64rr, X86::CMOVG64rm }, - { X86::CMOVGE16rr, X86::CMOVGE16rm }, - { X86::CMOVGE32rr, X86::CMOVGE32rm }, - { X86::CMOVGE64rr, X86::CMOVGE64rm }, - { X86::CMOVL16rr, X86::CMOVL16rm }, - { X86::CMOVL32rr, X86::CMOVL32rm }, - { X86::CMOVL64rr, X86::CMOVL64rm }, - { X86::CMOVLE16rr, X86::CMOVLE16rm }, - { X86::CMOVLE32rr, X86::CMOVLE32rm }, - { X86::CMOVLE64rr, X86::CMOVLE64rm }, - { X86::CMOVNE16rr, X86::CMOVNE16rm }, - { X86::CMOVNE32rr, X86::CMOVNE32rm }, - { X86::CMOVNE64rr, X86::CMOVNE64rm }, - { X86::CMOVNP16rr, X86::CMOVNP16rm }, - { X86::CMOVNP32rr, X86::CMOVNP32rm }, - { X86::CMOVNP64rr, X86::CMOVNP64rm }, - { X86::CMOVNS16rr, X86::CMOVNS16rm }, - { X86::CMOVNS32rr, X86::CMOVNS32rm }, - { X86::CMOVNS64rr, X86::CMOVNS64rm }, - { X86::CMOVP16rr, X86::CMOVP16rm }, - { X86::CMOVP32rr, X86::CMOVP32rm }, - { X86::CMOVP64rr, X86::CMOVP64rm }, - { X86::CMOVS16rr, X86::CMOVS16rm }, - { X86::CMOVS32rr, X86::CMOVS32rm }, - { X86::CMOVS64rr, X86::CMOVS64rm }, - { X86::CMPPDrri, X86::CMPPDrmi }, - { X86::CMPPSrri, X86::CMPPSrmi }, - { X86::CMPSDrr, X86::CMPSDrm }, - { X86::CMPSSrr, X86::CMPSSrm }, - { X86::DIVPDrr, X86::DIVPDrm }, - { X86::DIVPSrr, X86::DIVPSrm }, - { X86::DIVSDrr, X86::DIVSDrm }, - { X86::DIVSSrr, X86::DIVSSrm }, - { X86::HADDPDrr, X86::HADDPDrm }, - { X86::HADDPSrr, X86::HADDPSrm }, - { X86::HSUBPDrr, X86::HSUBPDrm }, - { X86::HSUBPSrr, X86::HSUBPSrm }, - { X86::IMUL16rr, X86::IMUL16rm }, - { X86::IMUL32rr, X86::IMUL32rm }, - { X86::IMUL64rr, X86::IMUL64rm }, - { X86::MAXPDrr, X86::MAXPDrm }, - { X86::MAXPDrr_Int, X86::MAXPDrm_Int }, - { X86::MAXPSrr, X86::MAXPSrm }, - { X86::MAXPSrr_Int, X86::MAXPSrm_Int }, - { X86::MAXSDrr, X86::MAXSDrm }, - { X86::MAXSDrr_Int, X86::MAXSDrm_Int }, - { X86::MAXSSrr, X86::MAXSSrm }, - { X86::MAXSSrr_Int, X86::MAXSSrm_Int }, - { X86::MINPDrr, X86::MINPDrm }, - { X86::MINPDrr_Int, X86::MINPDrm_Int }, - { X86::MINPSrr, X86::MINPSrm }, - { X86::MINPSrr_Int, X86::MINPSrm_Int }, - { X86::MINSDrr, X86::MINSDrm }, - { X86::MINSDrr_Int, X86::MINSDrm_Int }, - { X86::MINSSrr, X86::MINSSrm }, - { X86::MINSSrr_Int, X86::MINSSrm_Int }, - { X86::MULPDrr, X86::MULPDrm }, - { X86::MULPSrr, X86::MULPSrm }, - { X86::MULSDrr, X86::MULSDrm }, - { X86::MULSSrr, X86::MULSSrm }, - { X86::OR16rr, X86::OR16rm }, - { X86::OR32rr, X86::OR32rm }, - { X86::OR64rr, X86::OR64rm }, - { X86::OR8rr, X86::OR8rm }, - { X86::ORPDrr, X86::ORPDrm }, - { X86::ORPSrr, X86::ORPSrm }, - { X86::PACKSSDWrr, X86::PACKSSDWrm }, - { X86::PACKSSWBrr, X86::PACKSSWBrm }, - { X86::PACKUSWBrr, X86::PACKUSWBrm }, - { X86::PADDBrr, X86::PADDBrm }, - { X86::PADDDrr, X86::PADDDrm }, - { X86::PADDQrr, X86::PADDQrm }, - { X86::PADDSBrr, X86::PADDSBrm }, - { X86::PADDSWrr, X86::PADDSWrm }, - { X86::PADDWrr, X86::PADDWrm }, - { X86::PANDNrr, X86::PANDNrm }, - { X86::PANDrr, X86::PANDrm }, - { X86::PAVGBrr, X86::PAVGBrm }, - { X86::PAVGWrr, X86::PAVGWrm }, - { X86::PCMPEQBrr, X86::PCMPEQBrm }, - { X86::PCMPEQDrr, X86::PCMPEQDrm }, - { X86::PCMPEQWrr, X86::PCMPEQWrm }, - { X86::PCMPGTBrr, X86::PCMPGTBrm }, - { X86::PCMPGTDrr, X86::PCMPGTDrm }, - { X86::PCMPGTWrr, X86::PCMPGTWrm }, - { X86::PINSRWrri, X86::PINSRWrmi }, - { X86::PMADDWDrr, X86::PMADDWDrm }, - { X86::PMAXSWrr, X86::PMAXSWrm }, - { X86::PMAXUBrr, X86::PMAXUBrm }, - { X86::PMINSWrr, X86::PMINSWrm }, - { X86::PMINUBrr, X86::PMINUBrm }, - { X86::PMULHUWrr, X86::PMULHUWrm }, - { X86::PMULHWrr, X86::PMULHWrm }, - { X86::PMULLWrr, X86::PMULLWrm }, - { X86::PMULUDQrr, X86::PMULUDQrm }, - { X86::PORrr, X86::PORrm }, - { X86::PSADBWrr, X86::PSADBWrm }, - { X86::PSLLDrr, X86::PSLLDrm }, - { X86::PSLLQrr, X86::PSLLQrm }, - { X86::PSLLWrr, X86::PSLLWrm }, - { X86::PSRADrr, X86::PSRADrm }, - { X86::PSRAWrr, X86::PSRAWrm }, - { X86::PSRLDrr, X86::PSRLDrm }, - { X86::PSRLQrr, X86::PSRLQrm }, - { X86::PSRLWrr, X86::PSRLWrm }, - { X86::PSUBBrr, X86::PSUBBrm }, - { X86::PSUBDrr, X86::PSUBDrm }, - { X86::PSUBSBrr, X86::PSUBSBrm }, - { X86::PSUBSWrr, X86::PSUBSWrm }, - { X86::PSUBWrr, X86::PSUBWrm }, - { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm }, - { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm }, - { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm }, - { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm }, - { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm }, - { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm }, - { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm }, - { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm }, - { X86::PXORrr, X86::PXORrm }, - { X86::SBB32rr, X86::SBB32rm }, - { X86::SBB64rr, X86::SBB64rm }, - { X86::SHUFPDrri, X86::SHUFPDrmi }, - { X86::SHUFPSrri, X86::SHUFPSrmi }, - { X86::SUB16rr, X86::SUB16rm }, - { X86::SUB32rr, X86::SUB32rm }, - { X86::SUB64rr, X86::SUB64rm }, - { X86::SUB8rr, X86::SUB8rm }, - { X86::SUBPDrr, X86::SUBPDrm }, - { X86::SUBPSrr, X86::SUBPSrm }, - { X86::SUBSDrr, X86::SUBSDrm }, - { X86::SUBSSrr, X86::SUBSSrm }, - // FIXME: TEST*rr -> swapped operand of TEST*mr. - { X86::UNPCKHPDrr, X86::UNPCKHPDrm }, - { X86::UNPCKHPSrr, X86::UNPCKHPSrm }, - { X86::UNPCKLPDrr, X86::UNPCKLPDrm }, - { X86::UNPCKLPSrr, X86::UNPCKLPSrm }, - { X86::XOR16rr, X86::XOR16rm }, - { X86::XOR32rr, X86::XOR32rm }, - { X86::XOR64rr, X86::XOR64rm }, - { X86::XOR8rr, X86::XOR8rm }, - { X86::XORPDrr, X86::XORPDrm }, - { X86::XORPSrr, X86::XORPSrm } - }; - - for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { - unsigned RegOp = OpTbl2[i][0]; - unsigned MemOp = OpTbl2[i][1]; - if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp, MemOp))) - assert(false && "Duplicated entries?"); - unsigned AuxInfo = 2 | (1 << 4); // Index 1, folded load - if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, - std::make_pair(RegOp, AuxInfo)))) - AmbEntries.push_back(MemOp); - } - - // Remove ambiguous entries. - assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?"); } // getDwarfRegNum - This function maps LLVM register identifiers to the @@ -841,474 +239,6 @@ void X86RegisterInfo::reMaterialize(MachineBasicBlock &MBB, } } -static MachineInstr *FuseTwoAddrInst(unsigned Opcode, - SmallVector &MOs, - MachineInstr *MI, const TargetInstrInfo &TII) { - // Create the base instruction with the memory operand as the first part. - MachineInstr *NewMI = new MachineInstr(TII.get(Opcode), true); - MachineInstrBuilder MIB(NewMI); - unsigned NumAddrOps = MOs.size(); - for (unsigned i = 0; i != NumAddrOps; ++i) - MIB = X86InstrAddOperand(MIB, MOs[i]); - if (NumAddrOps < 4) // FrameIndex only - MIB.addImm(1).addReg(0).addImm(0); - - // Loop over the rest of the ri operands, converting them over. - unsigned NumOps = TII.getNumOperands(MI->getOpcode())-2; - for (unsigned i = 0; i != NumOps; ++i) { - MachineOperand &MO = MI->getOperand(i+2); - MIB = X86InstrAddOperand(MIB, MO); - } - for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - MIB = X86InstrAddOperand(MIB, MO); - } - return MIB; -} - -static MachineInstr *FuseInst(unsigned Opcode, unsigned OpNo, - SmallVector &MOs, - MachineInstr *MI, const TargetInstrInfo &TII) { - MachineInstr *NewMI = new MachineInstr(TII.get(Opcode), true); - MachineInstrBuilder MIB(NewMI); - - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (i == OpNo) { - assert(MO.isRegister() && "Expected to fold into reg operand!"); - unsigned NumAddrOps = MOs.size(); - for (unsigned i = 0; i != NumAddrOps; ++i) - MIB = X86InstrAddOperand(MIB, MOs[i]); - if (NumAddrOps < 4) // FrameIndex only - MIB.addImm(1).addReg(0).addImm(0); - } else { - MIB = X86InstrAddOperand(MIB, MO); - } - } - return MIB; -} - -static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, - SmallVector &MOs, - MachineInstr *MI) { - MachineInstrBuilder MIB = BuildMI(TII.get(Opcode)); - - unsigned NumAddrOps = MOs.size(); - for (unsigned i = 0; i != NumAddrOps; ++i) - MIB = X86InstrAddOperand(MIB, MOs[i]); - if (NumAddrOps < 4) // FrameIndex only - MIB.addImm(1).addReg(0).addImm(0); - return MIB.addImm(0); -} - -MachineInstr* -X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, unsigned i, - SmallVector &MOs) const { - const DenseMap *OpcodeTablePtr = NULL; - bool isTwoAddrFold = false; - unsigned NumOps = TII.getNumOperands(MI->getOpcode()); - bool isTwoAddr = NumOps > 1 && - MI->getInstrDescriptor()->getOperandConstraint(1, TOI::TIED_TO) != -1; - - MachineInstr *NewMI = NULL; - // Folding a memory location into the two-address part of a two-address - // instruction is different than folding it other places. It requires - // replacing the *two* registers with the memory location. - if (isTwoAddr && NumOps >= 2 && i < 2 && - MI->getOperand(0).isRegister() && - MI->getOperand(1).isRegister() && - MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { - OpcodeTablePtr = &RegOp2MemOpTable2Addr; - isTwoAddrFold = true; - } else if (i == 0) { // If operand 0 - if (MI->getOpcode() == X86::MOV16r0) - NewMI = MakeM0Inst(TII, X86::MOV16mi, MOs, MI); - else if (MI->getOpcode() == X86::MOV32r0) - NewMI = MakeM0Inst(TII, X86::MOV32mi, MOs, MI); - else if (MI->getOpcode() == X86::MOV64r0) - NewMI = MakeM0Inst(TII, X86::MOV64mi32, MOs, MI); - else if (MI->getOpcode() == X86::MOV8r0) - NewMI = MakeM0Inst(TII, X86::MOV8mi, MOs, MI); - if (NewMI) { - NewMI->copyKillDeadInfo(MI); - return NewMI; - } - - OpcodeTablePtr = &RegOp2MemOpTable0; - } else if (i == 1) { - OpcodeTablePtr = &RegOp2MemOpTable1; - } else if (i == 2) { - OpcodeTablePtr = &RegOp2MemOpTable2; - } - - // If table selected... - if (OpcodeTablePtr) { - // Find the Opcode to fuse - DenseMap::iterator I = - OpcodeTablePtr->find((unsigned*)MI->getOpcode()); - if (I != OpcodeTablePtr->end()) { - if (isTwoAddrFold) - NewMI = FuseTwoAddrInst(I->second, MOs, MI, TII); - else - NewMI = FuseInst(I->second, i, MOs, MI, TII); - NewMI->copyKillDeadInfo(MI); - return NewMI; - } - } - - // No fusion - if (PrintFailedFusing) - cerr << "We failed to fuse (" - << ((i == 1) ? "r" : "s") << "): " << *MI; - return NULL; -} - - -MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, - SmallVectorImpl &Ops, - int FrameIndex) const { - // Check switch flag - if (NoFusing) return NULL; - - if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { - unsigned NewOpc = 0; - switch (MI->getOpcode()) { - default: return NULL; - case X86::TEST8rr: NewOpc = X86::CMP8ri; break; - case X86::TEST16rr: NewOpc = X86::CMP16ri; break; - case X86::TEST32rr: NewOpc = X86::CMP32ri; break; - case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; - } - // Change to CMPXXri r, 0 first. - MI->setInstrDescriptor(TII.get(NewOpc)); - MI->getOperand(1).ChangeToImmediate(0); - } else if (Ops.size() != 1) - return NULL; - - SmallVector MOs; - MOs.push_back(MachineOperand::CreateFI(FrameIndex)); - return foldMemoryOperand(MI, Ops[0], MOs); -} - -MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, - SmallVectorImpl &Ops, - MachineInstr *LoadMI) const { - // Check switch flag - if (NoFusing) return NULL; - - if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { - unsigned NewOpc = 0; - switch (MI->getOpcode()) { - default: return NULL; - case X86::TEST8rr: NewOpc = X86::CMP8ri; break; - case X86::TEST16rr: NewOpc = X86::CMP16ri; break; - case X86::TEST32rr: NewOpc = X86::CMP32ri; break; - case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; - } - // Change to CMPXXri r, 0 first. - MI->setInstrDescriptor(TII.get(NewOpc)); - MI->getOperand(1).ChangeToImmediate(0); - } else if (Ops.size() != 1) - return NULL; - - SmallVector MOs; - unsigned NumOps = TII.getNumOperands(LoadMI->getOpcode()); - for (unsigned i = NumOps - 4; i != NumOps; ++i) - MOs.push_back(LoadMI->getOperand(i)); - return foldMemoryOperand(MI, Ops[0], MOs); -} - - -bool X86RegisterInfo::canFoldMemoryOperand(MachineInstr *MI, - SmallVectorImpl &Ops) const { - // Check switch flag - if (NoFusing) return 0; - - if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { - switch (MI->getOpcode()) { - default: return false; - case X86::TEST8rr: - case X86::TEST16rr: - case X86::TEST32rr: - case X86::TEST64rr: - return true; - } - } - - if (Ops.size() != 1) - return false; - - unsigned OpNum = Ops[0]; - unsigned Opc = MI->getOpcode(); - unsigned NumOps = TII.getNumOperands(Opc); - bool isTwoAddr = NumOps > 1 && - TII.getOperandConstraint(Opc, 1, TOI::TIED_TO) != -1; - - // Folding a memory location into the two-address part of a two-address - // instruction is different than folding it other places. It requires - // replacing the *two* registers with the memory location. - const DenseMap *OpcodeTablePtr = NULL; - if (isTwoAddr && NumOps >= 2 && OpNum < 2) { - OpcodeTablePtr = &RegOp2MemOpTable2Addr; - } else if (OpNum == 0) { // If operand 0 - switch (Opc) { - case X86::MOV16r0: - case X86::MOV32r0: - case X86::MOV64r0: - case X86::MOV8r0: - return true; - default: break; - } - OpcodeTablePtr = &RegOp2MemOpTable0; - } else if (OpNum == 1) { - OpcodeTablePtr = &RegOp2MemOpTable1; - } else if (OpNum == 2) { - OpcodeTablePtr = &RegOp2MemOpTable2; - } - - if (OpcodeTablePtr) { - // Find the Opcode to fuse - DenseMap::iterator I = - OpcodeTablePtr->find((unsigned*)Opc); - if (I != OpcodeTablePtr->end()) - return true; - } - return false; -} - -bool X86RegisterInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, - unsigned Reg, bool UnfoldLoad, bool UnfoldStore, - SmallVectorImpl &NewMIs) const { - DenseMap >::iterator I = - MemOp2RegOpTable.find((unsigned*)MI->getOpcode()); - if (I == MemOp2RegOpTable.end()) - return false; - unsigned Opc = I->second.first; - unsigned Index = I->second.second & 0xf; - bool FoldedLoad = I->second.second & (1 << 4); - bool FoldedStore = I->second.second & (1 << 5); - if (UnfoldLoad && !FoldedLoad) - return false; - UnfoldLoad &= FoldedLoad; - if (UnfoldStore && !FoldedStore) - return false; - UnfoldStore &= FoldedStore; - - const TargetInstrDescriptor &TID = TII.get(Opc); - const TargetOperandInfo &TOI = TID.OpInfo[Index]; - const TargetRegisterClass *RC = (TOI.Flags & M_LOOK_UP_PTR_REG_CLASS) - ? TII.getPointerRegClass() : getRegClass(TOI.RegClass); - SmallVector AddrOps; - SmallVector BeforeOps; - SmallVector AfterOps; - SmallVector ImpOps; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &Op = MI->getOperand(i); - if (i >= Index && i < Index+4) - AddrOps.push_back(Op); - else if (Op.isRegister() && Op.isImplicit()) - ImpOps.push_back(Op); - else if (i < Index) - BeforeOps.push_back(Op); - else if (i > Index) - AfterOps.push_back(Op); - } - - // Emit the load instruction. - if (UnfoldLoad) { - TII.loadRegFromAddr(MF, Reg, AddrOps, RC, NewMIs); - if (UnfoldStore) { - // Address operands cannot be marked isKill. - for (unsigned i = 1; i != 5; ++i) { - MachineOperand &MO = NewMIs[0]->getOperand(i); - if (MO.isRegister()) - MO.setIsKill(false); - } - } - } - - // Emit the data processing instruction. - MachineInstr *DataMI = new MachineInstr(TID, true); - MachineInstrBuilder MIB(DataMI); - - if (FoldedStore) - MIB.addReg(Reg, true); - for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i) - MIB = X86InstrAddOperand(MIB, BeforeOps[i]); - if (FoldedLoad) - MIB.addReg(Reg); - for (unsigned i = 0, e = AfterOps.size(); i != e; ++i) - MIB = X86InstrAddOperand(MIB, AfterOps[i]); - for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) { - MachineOperand &MO = ImpOps[i]; - MIB.addReg(MO.getReg(), MO.isDef(), true, MO.isKill(), MO.isDead()); - } - // Change CMP32ri r, 0 back to TEST32rr r, r, etc. - unsigned NewOpc = 0; - switch (DataMI->getOpcode()) { - default: break; - case X86::CMP64ri32: - case X86::CMP32ri: - case X86::CMP16ri: - case X86::CMP8ri: { - MachineOperand &MO0 = DataMI->getOperand(0); - MachineOperand &MO1 = DataMI->getOperand(1); - if (MO1.getImm() == 0) { - switch (DataMI->getOpcode()) { - default: break; - case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; - case X86::CMP32ri: NewOpc = X86::TEST32rr; break; - case X86::CMP16ri: NewOpc = X86::TEST16rr; break; - case X86::CMP8ri: NewOpc = X86::TEST8rr; break; - } - DataMI->setInstrDescriptor(TII.get(NewOpc)); - MO1.ChangeToRegister(MO0.getReg(), false); - } - } - } - NewMIs.push_back(DataMI); - - // Emit the store instruction. - if (UnfoldStore) { - const TargetOperandInfo &DstTOI = TID.OpInfo[0]; - const TargetRegisterClass *DstRC = (DstTOI.Flags & M_LOOK_UP_PTR_REG_CLASS) - ? TII.getPointerRegClass() : getRegClass(DstTOI.RegClass); - TII.storeRegToAddr(MF, Reg, true, AddrOps, DstRC, NewMIs); - } - - return true; -} - -static unsigned getLoadRegOpcode(const TargetRegisterClass *RC, - unsigned StackAlign) { - unsigned Opc = 0; - if (RC == &X86::GR64RegClass) { - Opc = X86::MOV64rm; - } else if (RC == &X86::GR32RegClass) { - Opc = X86::MOV32rm; - } else if (RC == &X86::GR16RegClass) { - Opc = X86::MOV16rm; - } else if (RC == &X86::GR8RegClass) { - Opc = X86::MOV8rm; - } else if (RC == &X86::GR32_RegClass) { - Opc = X86::MOV32_rm; - } else if (RC == &X86::GR16_RegClass) { - Opc = X86::MOV16_rm; - } else if (RC == &X86::RFP80RegClass) { - Opc = X86::LD_Fp80m; - } else if (RC == &X86::RFP64RegClass) { - Opc = X86::LD_Fp64m; - } else if (RC == &X86::RFP32RegClass) { - Opc = X86::LD_Fp32m; - } else if (RC == &X86::FR32RegClass) { - Opc = X86::MOVSSrm; - } else if (RC == &X86::FR64RegClass) { - Opc = X86::MOVSDrm; - } else if (RC == &X86::VR128RegClass) { - // FIXME: Use movaps once we are capable of selectively - // aligning functions that spill SSE registers on 16-byte boundaries. - Opc = StackAlign >= 16 ? X86::MOVAPSrm : X86::MOVUPSrm; - } else if (RC == &X86::VR64RegClass) { - Opc = X86::MMX_MOVQ64rm; - } else { - assert(0 && "Unknown regclass"); - abort(); - } - - return Opc; -} - -bool -X86RegisterInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, - SmallVectorImpl &NewNodes) const { - if (!N->isTargetOpcode()) - return false; - - DenseMap >::iterator I = - MemOp2RegOpTable.find((unsigned*)N->getTargetOpcode()); - if (I == MemOp2RegOpTable.end()) - return false; - unsigned Opc = I->second.first; - unsigned Index = I->second.second & 0xf; - bool FoldedLoad = I->second.second & (1 << 4); - bool FoldedStore = I->second.second & (1 << 5); - const TargetInstrDescriptor &TID = TII.get(Opc); - const TargetOperandInfo &TOI = TID.OpInfo[Index]; - const TargetRegisterClass *RC = (TOI.Flags & M_LOOK_UP_PTR_REG_CLASS) - ? TII.getPointerRegClass() : getRegClass(TOI.RegClass); - std::vector AddrOps; - std::vector BeforeOps; - std::vector AfterOps; - unsigned NumOps = N->getNumOperands(); - for (unsigned i = 0; i != NumOps-1; ++i) { - SDOperand Op = N->getOperand(i); - if (i >= Index && i < Index+4) - AddrOps.push_back(Op); - else if (i < Index) - BeforeOps.push_back(Op); - else if (i > Index) - AfterOps.push_back(Op); - } - SDOperand Chain = N->getOperand(NumOps-1); - AddrOps.push_back(Chain); - - // Emit the load instruction. - SDNode *Load = 0; - if (FoldedLoad) { - MVT::ValueType VT = *RC->vt_begin(); - Load = DAG.getTargetNode(getLoadRegOpcode(RC, StackAlign), VT, MVT::Other, - &AddrOps[0], AddrOps.size()); - NewNodes.push_back(Load); - } - - // Emit the data processing instruction. - std::vector VTs; - const TargetRegisterClass *DstRC = 0; - if (TID.numDefs > 0) { - const TargetOperandInfo &DstTOI = TID.OpInfo[0]; - DstRC = (DstTOI.Flags & M_LOOK_UP_PTR_REG_CLASS) - ? TII.getPointerRegClass() : getRegClass(DstTOI.RegClass); - VTs.push_back(*DstRC->vt_begin()); - } - for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { - MVT::ValueType VT = N->getValueType(i); - if (VT != MVT::Other && i >= TID.numDefs) - VTs.push_back(VT); - } - if (Load) - BeforeOps.push_back(SDOperand(Load, 0)); - std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps)); - SDNode *NewNode= DAG.getTargetNode(Opc, VTs, &BeforeOps[0], BeforeOps.size()); - NewNodes.push_back(NewNode); - - // Emit the store instruction. - if (FoldedStore) { - AddrOps.pop_back(); - AddrOps.push_back(SDOperand(NewNode, 0)); - AddrOps.push_back(Chain); - SDNode *Store = DAG.getTargetNode(getStoreRegOpcode(DstRC, StackAlign), - MVT::Other, &AddrOps[0], AddrOps.size()); - NewNodes.push_back(Store); - } - - return true; -} - -unsigned X86RegisterInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, - bool UnfoldLoad, bool UnfoldStore) const { - DenseMap >::iterator I = - MemOp2RegOpTable.find((unsigned*)Opc); - if (I == MemOp2RegOpTable.end()) - return 0; - bool FoldedLoad = I->second.second & (1 << 4); - bool FoldedStore = I->second.second & (1 << 5); - if (UnfoldLoad && !FoldedLoad) - return 0; - if (UnfoldStore && !FoldedStore) - return 0; - return I->second.first; -} - const unsigned * X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { static const unsigned CalleeSavedRegs32Bit[] = { diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index de348d7..d78311a 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -66,18 +66,6 @@ private: /// unsigned FramePtr; - /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1, - /// RegOp2MemOpTable2 - Load / store folding opcode maps. - /// - DenseMap RegOp2MemOpTable2Addr; - DenseMap RegOp2MemOpTable0; - DenseMap RegOp2MemOpTable1; - DenseMap RegOp2MemOpTable2; - - /// MemOp2RegOpTable - Load / store unfolding opcode map. - /// - DenseMap > MemOp2RegOpTable; - public: X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii); @@ -99,44 +87,6 @@ public: void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, const MachineInstr *Orig) const; - /// foldMemoryOperand - If this target supports it, fold a load or store of - /// the specified stack slot into the specified machine instruction for the - /// specified operand(s). If this is possible, the target should perform the - /// folding and return true, otherwise it should return false. If it folds - /// the instruction, it is likely that the MachineInstruction the iterator - /// references has been changed. - MachineInstr* foldMemoryOperand(MachineInstr* MI, - SmallVectorImpl &Ops, - int FrameIndex) const; - - /// foldMemoryOperand - Same as the previous version except it allows folding - /// of any load and store from / to any address, not just from a specific - /// stack slot. - MachineInstr* foldMemoryOperand(MachineInstr* MI, - SmallVectorImpl &Ops, - MachineInstr* LoadMI) const; - - /// canFoldMemoryOperand - Returns true if the specified load / store is - /// folding is possible. - bool canFoldMemoryOperand(MachineInstr*, SmallVectorImpl &) const; - - /// unfoldMemoryOperand - Separate a single instruction which folded a load or - /// a store or a load and a store into two or more instruction. If this is - /// possible, returns true as well as the new instructions by reference. - bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, - unsigned Reg, bool UnfoldLoad, bool UnfoldStore, - SmallVectorImpl &NewMIs) const; - - bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, - SmallVectorImpl &NewNodes) const; - - /// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new - /// instruction after load / store are unfolded from an instruction of the - /// specified opcode. It returns zero if the specified unfolding is not - /// possible. - unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, - bool UnfoldLoad, bool UnfoldStore) const; - /// getCalleeSavedRegs - Return a null-terminated list of all of the /// callee-save registers on this target. const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; @@ -177,11 +127,6 @@ public: // Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; - -private: - MachineInstr* foldMemoryOperand(MachineInstr* MI, - unsigned OpNum, - SmallVector &MOs) const; }; // getX86SubSuperRegister - X86 utility function. It returns the sub or super -- cgit v1.1