aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2007-10-19 21:23:22 +0000
committerEvan Cheng <evan.cheng@apple.com>2007-10-19 21:23:22 +0000
commit66f716354527c5ab4687a89a1605915e5128a106 (patch)
tree56c81a5bb8979cc3df924078dcbfb328a5d9ee64
parent80629c85f1041df41b5158ebb03a4725af6ecd90 (diff)
downloadexternal_llvm-66f716354527c5ab4687a89a1605915e5128a106.zip
external_llvm-66f716354527c5ab4687a89a1605915e5128a106.tar.gz
external_llvm-66f716354527c5ab4687a89a1605915e5128a106.tar.bz2
Local spiller optimization:
Turn a store folding instruction into a load folding instruction. e.g. xorl %edi, %eax movl %eax, -32(%ebp) movl -36(%ebp), %eax orl %eax, -32(%ebp) => xorl %edi, %eax orl -36(%ebp), %eax mov %eax, -32(%ebp) This enables the unfolding optimization for a subsequent instruction which will also eliminate the newly introduced store instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@43192 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Target/MRegisterInfo.h13
-rw-r--r--lib/CodeGen/VirtRegMap.cpp382
-rw-r--r--lib/Target/X86/X86InstrInfo.td12
-rw-r--r--lib/Target/X86/X86InstrX86-64.td17
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp113
-rw-r--r--lib/Target/X86/X86RegisterInfo.h11
6 files changed, 368 insertions, 180 deletions
diff --git a/include/llvm/Target/MRegisterInfo.h b/include/llvm/Target/MRegisterInfo.h
index dd24f37..afcadc4 100644
--- a/include/llvm/Target/MRegisterInfo.h
+++ b/include/llvm/Target/MRegisterInfo.h
@@ -563,6 +563,14 @@ public:
return 0;
}
+ /// getOpcodeAfterMemoryFold - Returns the opcode of the would be new
+ /// instruction after load / store is folded into an instruction of the
+ /// specified opcode. It returns zero if the specified unfolding is not
+ /// possible.
+ virtual unsigned getOpcodeAfterMemoryFold(unsigned Opc, unsigned OpNum) const{
+ return 0;
+ }
+
/// unfoldMemoryOperand - Separate a single instruction which folded a load or
/// a a store or a load and a store into two or more instruction. If this is
/// possible, returns true as well as the new instructions by reference.
@@ -578,8 +586,9 @@ public:
}
/// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new
- /// instruction after load / store are unfolded from the specified opcode.
- /// It returns zero if the specified unfolding is impossible.
+ /// instruction after load / store are unfolded from an instruction of the
+ /// specified opcode. It returns zero if the specified unfolding is not
+ /// possible.
virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
bool UnfoldLoad, bool UnfoldStore) const {
return 0;
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index d6dc92a..4f3a963 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -242,6 +242,8 @@ bool SimpleSpiller::runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) {
//===----------------------------------------------------------------------===//
namespace {
+ class AvailableSpills;
+
/// LocalSpiller - This spiller does a simple pass over the machine basic
/// block to attempt to keep spills in registers as much as possible for
/// blocks that have low register pressure (the vreg may be spilled due to
@@ -270,6 +272,12 @@ namespace {
return true;
}
private:
+ bool PrepForUnfoldOpti(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ AvailableSpills &Spills, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM);
void RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM);
};
}
@@ -357,7 +365,7 @@ public:
void disallowClobberPhysReg(unsigned PhysReg);
/// ClobberPhysReg - This is called when the specified physreg changes
- /// value. We use this to invalidate any info about stuff we thing lives in
+ /// value. We use this to invalidate any info about stuff that lives in
/// it and any of its aliases.
void ClobberPhysReg(unsigned PhysReg);
@@ -450,7 +458,7 @@ void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) {
/// marked kill, then invalidate the information.
static void InvalidateKills(MachineInstr &MI, BitVector &RegKills,
std::vector<MachineOperand*> &KillOps,
- SmallVector<unsigned, 1> *KillRegs = NULL) {
+ SmallVector<unsigned, 2> *KillRegs = NULL) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isRegister() || !MO.isUse() || !MO.isKill())
@@ -723,6 +731,112 @@ namespace {
};
}
+/// PrepForUnfoldOpti - Turn a store folding instruction into a load folding
+/// instruction. e.g.
+/// xorl %edi, %eax
+/// movl %eax, -32(%ebp)
+/// movl -36(%ebp), %eax
+/// orl %eax, -32(%ebp)
+/// ==>
+/// xorl %edi, %eax
+/// orl -36(%ebp), %eax
+/// mov %eax, -32(%ebp)
+/// This enables unfolding optimization for a subsequent instruction which will
+/// also eliminate the newly introduced store instruction.
+bool LocalSpiller::PrepForUnfoldOpti(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ AvailableSpills &Spills,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+ MachineFunction &MF = *MBB.getParent();
+ MachineInstr &MI = *MII;
+ unsigned UnfoldedOpc = 0;
+ unsigned UnfoldPR = 0;
+ unsigned UnfoldVR = 0;
+ int FoldedSS = VirtRegMap::NO_STACK_SLOT;
+ VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+ for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) {
+ // Only transform a MI that folds a single register.
+ if (UnfoldedOpc)
+ return false;
+ UnfoldVR = I->second.first;
+ VirtRegMap::ModRef MR = I->second.second;
+ if (VRM.isAssignedReg(UnfoldVR))
+ continue;
+ // If this reference is not a use, any previous store is now dead.
+ // Otherwise, the store to this stack slot is not dead anymore.
+ FoldedSS = VRM.getStackSlot(UnfoldVR);
+ MachineInstr* DeadStore = MaybeDeadStores[FoldedSS];
+ if (DeadStore && (MR & VirtRegMap::isModRef)) {
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS);
+ if (!PhysReg ||
+ DeadStore->findRegisterUseOperandIdx(PhysReg, true) == -1)
+ continue;
+ UnfoldPR = PhysReg;
+ UnfoldedOpc = MRI->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
+ false, true);
+ }
+ }
+
+ if (!UnfoldedOpc)
+ return false;
+
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isRegister() || MO.getReg() == 0 || !MO.isUse())
+ continue;
+ unsigned VirtReg = MO.getReg();
+ if (MRegisterInfo::isPhysicalRegister(VirtReg) ||
+ RegMap->isSubRegister(VirtReg))
+ continue;
+ if (VRM.isAssignedReg(VirtReg)) {
+ unsigned PhysReg = VRM.getPhys(VirtReg);
+ if (PhysReg && MRI->regsOverlap(PhysReg, UnfoldPR))
+ return false;
+ } else if (VRM.isReMaterialized(VirtReg))
+ continue;
+ int SS = VRM.getStackSlot(VirtReg);
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+ if (PhysReg) {
+ if (MRI->regsOverlap(PhysReg, UnfoldPR))
+ return false;
+ continue;
+ }
+ PhysReg = VRM.getPhys(VirtReg);
+ if (!MRI->regsOverlap(PhysReg, UnfoldPR))
+ continue;
+
+ // Ok, we'll need to reload the value into a register which makes
+ // it impossible to perform the store unfolding optimization later.
+ // Let's see if it is possible to fold the load if the store is
+ // unfolded. This allows us to perform the store unfolding
+ // optimization.
+ SmallVector<MachineInstr*, 4> NewMIs;
+ if (MRI->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) {
+ assert(NewMIs.size() == 1);
+ MachineInstr *NewMI = NewMIs.back();
+ NewMIs.clear();
+ unsigned Idx = NewMI->findRegisterUseOperandIdx(VirtReg);
+ MachineInstr *FoldedMI = MRI->foldMemoryOperand(NewMI, Idx, SS);
+ if (FoldedMI) {
+ if (VRM.hasPhys(UnfoldVR))
+ assert(VRM.getPhys(UnfoldVR) == UnfoldPR);
+ else
+ VRM.assignVirt2Phys(UnfoldVR, UnfoldPR);
+
+ VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
+ MII = MBB.insert(MII, FoldedMI);
+ VRM.RemoveFromFoldedVirtMap(&MI);
+ MBB.erase(&MI);
+ return true;
+ }
+ delete NewMI;
+ }
+ }
+ return false;
+}
/// rewriteMBB - Keep track of which spills are available even after the
/// register allocator is done with them. If possible, avoid reloading vregs.
@@ -754,28 +868,21 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
MII != E; ) {
- MachineInstr &MI = *MII;
MachineBasicBlock::iterator NextMII = MII; ++NextMII;
- VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+ VirtRegMap::MI2VirtMapTy::const_iterator I, End;
bool Erased = false;
bool BackTracked = false;
+ if (PrepForUnfoldOpti(MBB, MII,
+ MaybeDeadStores, Spills, RegKills, KillOps, VRM))
+ NextMII = next(MII);
/// ReusedOperands - Keep track of operand reuse in case we need to undo
/// reuse.
+ MachineInstr &MI = *MII;
ReuseInfo ReusedOperands(MI, MRI);
- // Loop over all of the implicit defs, clearing them from our available
- // sets.
const TargetInstrDescriptor *TID = MI.getInstrDescriptor();
- if (TID->ImplicitDefs) {
- const unsigned *ImpDef = TID->ImplicitDefs;
- for ( ; *ImpDef; ++ImpDef) {
- MF.setPhysRegUsed(*ImpDef);
- ReusedOperands.markClobbered(*ImpDef);
- Spills.ClobberPhysReg(*ImpDef);
- }
- }
// Process all of the spilled uses and all non spilled reg references.
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
@@ -788,7 +895,6 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
// Ignore physregs for spilling, but remember that it is used by this
// function.
MF.setPhysRegUsed(VirtReg);
- ReusedOperands.markClobbered(VirtReg);
continue;
}
@@ -826,7 +932,7 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
if (!PhysReg && DoReMat) {
// This use is rematerializable. But perhaps the value is available in
- // stack if the definition is not deleted. If so, check if we can
+ // a register if the definition is not deleted. If so, check if we can
// reuse the value.
ReuseSlot = VRM.getStackSlot(VirtReg);
if (ReuseSlot != VirtRegMap::NO_STACK_SLOT)
@@ -857,7 +963,6 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
// aren't allowed to modify the reused register. If none of these cases
// apply, reuse it.
bool CanReuse = true;
-
int ti = TID->getOperandConstraint(i, TOI::TIED_TO);
if (ti != -1 &&
MI.getOperand(ti).isRegister() &&
@@ -911,8 +1016,8 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
if (DeadStore) {
DOUT << "Removed dead store:\t" << *DeadStore;
InvalidateKills(*DeadStore, RegKills, KillOps);
- MBB.erase(DeadStore);
VRM.RemoveFromFoldedVirtMap(DeadStore);
+ MBB.erase(DeadStore);
MaybeDeadStores[ReuseSlot] = NULL;
++NumDSE;
}
@@ -977,7 +1082,7 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
DOUT << '\t' << *prior(MII);
++NumReused;
continue;
- } // is (PhysReg)
+ } // if (PhysReg)
// Otherwise, reload it and remember that we have it.
PhysReg = VRM.getPhys(VirtReg);
@@ -1023,12 +1128,11 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
// If we have folded references to memory operands, make sure we clear all
// physical registers that may contain the value of the spilled virtual
// register
- SmallSet<int, 1> FoldedSS;
+ SmallSet<int, 2> FoldedSS;
for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) {
- DOUT << "Folded vreg: " << I->second.first << " MR: "
- << I->second.second;
unsigned VirtReg = I->second.first;
VirtRegMap::ModRef MR = I->second.second;
+ DOUT << "Folded vreg: " << VirtReg << " MR: " << MR;
if (VRM.isAssignedReg(VirtReg)) {
DOUT << ": No stack slot!\n";
continue;
@@ -1084,9 +1188,9 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
// Otherwise, the store to this stack slot is not dead anymore.
MachineInstr* DeadStore = MaybeDeadStores[SS];
if (DeadStore) {
- bool isDead = true;
+ bool isDead = !(MR & VirtRegMap::isRef);
MachineInstr *NewStore = NULL;
- if (MR & VirtRegMap::isRef) {
+ if (MR & VirtRegMap::isMod) {
unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
SmallVector<MachineInstr*, 4> NewMIs;
if (PhysReg &&
@@ -1101,8 +1205,8 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
--NextMII;
--NextMII; // backtrack to the unfolded instruction.
BackTracked = true;
- } else
- isDead = false;
+ isDead = true;
+ }
}
if (isDead) { // Previous store is dead.
@@ -1156,132 +1260,132 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
// Process all of the spilled defs.
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
- if (MO.isRegister() && MO.getReg() && MO.isDef()) {
- unsigned VirtReg = MO.getReg();
+ if (!(MO.isRegister() && MO.getReg() && MO.isDef()))
+ continue;
- if (!MRegisterInfo::isVirtualRegister(VirtReg)) {
- // Check to see if this is a noop copy. If so, eliminate the
- // instruction before considering the dest reg to be changed.
- unsigned Src, Dst;
- if (TII->isMoveInstr(MI, Src, Dst) && Src == Dst) {
- ++NumDCE;
- DOUT << "Removing now-noop copy: " << MI;
- MBB.erase(&MI);
- Erased = true;
- VRM.RemoveFromFoldedVirtMap(&MI);
- Spills.disallowClobberPhysReg(VirtReg);
- goto ProcessNextInst;
- }
+ unsigned VirtReg = MO.getReg();
+ if (!MRegisterInfo::isVirtualRegister(VirtReg)) {
+ // Check to see if this is a noop copy. If so, eliminate the
+ // instruction before considering the dest reg to be changed.
+ unsigned Src, Dst;
+ if (TII->isMoveInstr(MI, Src, Dst) && Src == Dst) {
+ ++NumDCE;
+ DOUT << "Removing now-noop copy: " << MI;
+ MBB.erase(&MI);
+ Erased = true;
+ VRM.RemoveFromFoldedVirtMap(&MI);
+ Spills.disallowClobberPhysReg(VirtReg);
+ goto ProcessNextInst;
+ }
- // If it's not a no-op copy, it clobbers the value in the destreg.
- Spills.ClobberPhysReg(VirtReg);
- ReusedOperands.markClobbered(VirtReg);
+ // If it's not a no-op copy, it clobbers the value in the destreg.
+ Spills.ClobberPhysReg(VirtReg);
+ ReusedOperands.markClobbered(VirtReg);
- // Check to see if this instruction is a load from a stack slot into
- // a register. If so, this provides the stack slot value in the reg.
- int FrameIdx;
- if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
- assert(DestReg == VirtReg && "Unknown load situation!");
-
- // If it is a folded reference, then it's not safe to clobber.
- bool Folded = FoldedSS.count(FrameIdx);
- // Otherwise, if it wasn't available, remember that it is now!
- Spills.addAvailable(FrameIdx, &MI, DestReg, !Folded);
- goto ProcessNextInst;
- }
-
- continue;
+ // Check to see if this instruction is a load from a stack slot into
+ // a register. If so, this provides the stack slot value in the reg.
+ int FrameIdx;
+ if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
+ assert(DestReg == VirtReg && "Unknown load situation!");
+
+ // If it is a folded reference, then it's not safe to clobber.
+ bool Folded = FoldedSS.count(FrameIdx);
+ // Otherwise, if it wasn't available, remember that it is now!
+ Spills.addAvailable(FrameIdx, &MI, DestReg, !Folded);
+ goto ProcessNextInst;
}
+
+ continue;
+ }
- bool DoReMat = VRM.isReMaterialized(VirtReg);
- if (DoReMat)
- ReMatDefs.insert(&MI);
-
- // The only vregs left are stack slot definitions.
- int StackSlot = VRM.getStackSlot(VirtReg);
- const TargetRegisterClass *RC = RegMap->getRegClass(VirtReg);
-
- // If this def is part of a two-address operand, make sure to execute
- // the store from the correct physical register.
- unsigned PhysReg;
- int TiedOp = MI.getInstrDescriptor()->findTiedToSrcOperand(i);
- if (TiedOp != -1)
- PhysReg = MI.getOperand(TiedOp).getReg();
- else {
- PhysReg = VRM.getPhys(VirtReg);
- if (ReusedOperands.isClobbered(PhysReg)) {
- // Another def has taken the assigned physreg. It must have been a
- // use&def which got it due to reuse. Undo the reuse!
- PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI,
+ bool DoReMat = VRM.isReMaterialized(VirtReg);
+ if (DoReMat)
+ ReMatDefs.insert(&MI);
+
+ // The only vregs left are stack slot definitions.
+ int StackSlot = VRM.getStackSlot(VirtReg);
+ const TargetRegisterClass *RC = RegMap->getRegClass(VirtReg);
+
+ // If this def is part of a two-address operand, make sure to execute
+ // the store from the correct physical register.
+ unsigned PhysReg;
+ int TiedOp = MI.getInstrDescriptor()->findTiedToSrcOperand(i);
+ if (TiedOp != -1)
+ PhysReg = MI.getOperand(TiedOp).getReg();
+ else {
+ PhysReg = VRM.getPhys(VirtReg);
+ if (ReusedOperands.isClobbered(PhysReg)) {
+ // Another def has taken the assigned physreg. It must have been a
+ // use&def which got it due to reuse. Undo the reuse!
+ PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI,
Spills, MaybeDeadStores, RegKills, KillOps, VRM);
- }
}
+ }
- MF.setPhysRegUsed(PhysReg);
- ReusedOperands.markClobbered(PhysReg);
- MI.getOperand(i).setReg(PhysReg);
- if (!MO.isDead()) {
- MRI->storeRegToStackSlot(MBB, next(MII), PhysReg, StackSlot, RC);
- DOUT << "Store:\t" << *next(MII);
-
- // If there is a dead store to this stack slot, nuke it now.
- MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
- if (LastStore) {
- DOUT << "Removed dead store:\t" << *LastStore;
- ++NumDSE;
- SmallVector<unsigned, 1> KillRegs;
- InvalidateKills(*LastStore, RegKills, KillOps, &KillRegs);
- MachineBasicBlock::iterator PrevMII = LastStore;
- bool CheckDef = PrevMII != MBB.begin();
- if (CheckDef)
- --PrevMII;
- MBB.erase(LastStore);
- VRM.RemoveFromFoldedVirtMap(LastStore);
- if (CheckDef) {
- // Look at defs of killed registers on the store. Mark the defs
- // as dead since the store has been deleted and they aren't
- // being reused.
- for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
- bool HasOtherDef = false;
- if (InvalidateRegDef(PrevMII, MI, KillRegs[j], HasOtherDef)) {
- MachineInstr *DeadDef = PrevMII;
- if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
- // FIXME: This assumes a remat def does not have side
- // effects.
- MBB.erase(DeadDef);
- VRM.RemoveFromFoldedVirtMap(DeadDef);
- ++NumDRM;
- }
+ MF.setPhysRegUsed(PhysReg);
+ ReusedOperands.markClobbered(PhysReg);
+ MI.getOperand(i).setReg(PhysReg);
+ if (!MO.isDead()) {
+ MRI->storeRegToStackSlot(MBB, next(MII), PhysReg, StackSlot, RC);
+ DOUT << "Store:\t" << *next(MII);
+
+ // If there is a dead store to this stack slot, nuke it now.
+ MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
+ if (LastStore) {
+ DOUT << "Removed dead store:\t" << *LastStore;
+ ++NumDSE;
+ SmallVector<unsigned, 2> KillRegs;
+ InvalidateKills(*LastStore, RegKills, KillOps, &KillRegs);
+ MachineBasicBlock::iterator PrevMII = LastStore;
+ bool CheckDef = PrevMII != MBB.begin();
+ if (CheckDef)
+ --PrevMII;
+ MBB.erase(LastStore);
+ VRM.RemoveFromFoldedVirtMap(LastStore);
+ if (CheckDef) {
+ // Look at defs of killed registers on the store. Mark the defs
+ // as dead since the store has been deleted and they aren't
+ // being reused.
+ for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
+ bool HasOtherDef = false;
+ if (InvalidateRegDef(PrevMII, MI, KillRegs[j], HasOtherDef)) {
+ MachineInstr *DeadDef = PrevMII;
+ if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
+ // FIXME: This assumes a remat def does not have side
+ // effects.
+ MBB.erase(DeadDef);
+ VRM.RemoveFromFoldedVirtMap(DeadDef);
+ ++NumDRM;
}
}
}
}
- LastStore = next(MII);
-
- // If the stack slot value was previously available in some other
- // register, change it now. Otherwise, make the register available,
- // in PhysReg.
- Spills.ModifyStackSlotOrReMat(StackSlot);
- Spills.ClobberPhysReg(PhysReg);
- Spills.addAvailable(StackSlot, LastStore, PhysReg);
- ++NumStores;
-
- // Check to see if this is a noop copy. If so, eliminate the
- // instruction before considering the dest reg to be changed.
- {
- unsigned Src, Dst;
- if (TII->isMoveInstr(MI, Src, Dst) && Src == Dst) {
- ++NumDCE;
- DOUT << "Removing now-noop copy: " << MI;
- MBB.erase(&MI);
- Erased = true;
- VRM.RemoveFromFoldedVirtMap(&MI);
- UpdateKills(*LastStore, RegKills, KillOps);
- goto ProcessNextInst;
- }
+ }
+ LastStore = next(MII);
+
+ // If the stack slot value was previously available in some other
+ // register, change it now. Otherwise, make the register available,
+ // in PhysReg.
+ Spills.ModifyStackSlotOrReMat(StackSlot);
+ Spills.ClobberPhysReg(PhysReg);
+ Spills.addAvailable(StackSlot, LastStore, PhysReg);
+ ++NumStores;
+
+ // Check to see if this is a noop copy. If so, eliminate the
+ // instruction before considering the dest reg to be changed.
+ {
+ unsigned Src, Dst;
+ if (TII->isMoveInstr(MI, Src, Dst) && Src == Dst) {
+ ++NumDCE;
+ DOUT << "Removing now-noop copy: " << MI;
+ MBB.erase(&MI);
+ Erased = true;
+ VRM.RemoveFromFoldedVirtMap(&MI);
+ UpdateKills(*LastStore, RegKills, KillOps);
+ goto ProcessNextInst;
}
- }
- }
+ }
+ }
}
ProcessNextInst:
if (!Erased && !BackTracked)
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 6302024..e9f9bd6 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1062,9 +1062,11 @@ let isTwoAddress = 0, CodeSize = 2 in {
def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
[(store (add (loadi8 addr:$dst), 1), addr:$dst)]>;
def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
- [(store (add (loadi16 addr:$dst), 1), addr:$dst)]>, OpSize;
+ [(store (add (loadi16 addr:$dst), 1), addr:$dst)]>,
+ OpSize, Requires<[In32BitMode]>;
def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
- [(store (add (loadi32 addr:$dst), 1), addr:$dst)]>;
+ [(store (add (loadi32 addr:$dst), 1), addr:$dst)]>,
+ Requires<[In32BitMode]>;
}
let CodeSize = 2 in
@@ -1082,9 +1084,11 @@ let isTwoAddress = 0, CodeSize = 2 in {
def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
[(store (add (loadi8 addr:$dst), -1), addr:$dst)]>;
def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
- [(store (add (loadi16 addr:$dst), -1), addr:$dst)]>, OpSize;
+ [(store (add (loadi16 addr:$dst), -1), addr:$dst)]>,
+ OpSize, Requires<[In32BitMode]>;
def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
- [(store (add (loadi32 addr:$dst), -1), addr:$dst)]>;
+ [(store (add (loadi32 addr:$dst), -1), addr:$dst)]>,
+ Requires<[In32BitMode]>;
}
} // Defs = [EFLAGS]
diff --git a/lib/Target/X86/X86InstrX86-64.td b/lib/Target/X86/X86InstrX86-64.td
index 077e9dc..42863d4 100644
--- a/lib/Target/X86/X86InstrX86-64.td
+++ b/lib/Target/X86/X86InstrX86-64.td
@@ -461,6 +461,23 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src), "dec{l}\t$dst"
[(set GR32:$dst, (add GR32:$src, -1))]>,
Requires<[In64BitMode]>;
} // isConvertibleToThreeAddress
+
+// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
+// how to unfold them.
+let isTwoAddress = 0, CodeSize = 2 in {
+ def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), 1), addr:$dst)]>,
+ OpSize, Requires<[In64BitMode]>;
+ def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), 1), addr:$dst)]>,
+ Requires<[In64BitMode]>;
+ def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), -1), addr:$dst)]>,
+ OpSize, Requires<[In64BitMode]>;
+ def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), -1), addr:$dst)]>,
+ Requires<[In64BitMode]>;
+}
} // Defs = [EFLAGS], CodeSize
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index e88c050..4828067 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -97,14 +97,14 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
{ X86::AND8rr, X86::AND8mr },
{ X86::DEC16r, X86::DEC16m },
{ X86::DEC32r, X86::DEC32m },
- { X86::DEC64_16r, X86::DEC16m },
- { X86::DEC64_32r, X86::DEC32m },
+ { X86::DEC64_16r, X86::DEC64_16m },
+ { X86::DEC64_32r, X86::DEC64_32m },
{ X86::DEC64r, X86::DEC64m },
{ X86::DEC8r, X86::DEC8m },
{ X86::INC16r, X86::INC16m },
{ X86::INC32r, X86::INC32m },
- { X86::INC64_16r, X86::INC16m },
- { X86::INC64_32r, X86::INC32m },
+ { X86::INC64_16r, X86::INC64_16m },
+ { X86::INC64_32r, X86::INC64_32m },
{ X86::INC64r, X86::INC64m },
{ X86::INC8r, X86::INC8m },
{ X86::NEG16r, X86::NEG16m },
@@ -981,10 +981,9 @@ void X86RegisterInfo::reMaterialize(MachineBasicBlock &MBB,
static MachineInstr *FuseTwoAddrInst(unsigned Opcode,
SmallVector<MachineOperand,4> &MOs,
MachineInstr *MI, const TargetInstrInfo &TII) {
- unsigned NumOps = TII.getNumOperands(MI->getOpcode())-2;
-
// Create the base instruction with the memory operand as the first part.
- MachineInstrBuilder MIB = BuildMI(TII.get(Opcode));
+ MachineInstr *NewMI = new MachineInstr(TII.get(Opcode), true);
+ MachineInstrBuilder MIB(NewMI);
unsigned NumAddrOps = MOs.size();
for (unsigned i = 0; i != NumAddrOps; ++i)
MIB = X86InstrAddOperand(MIB, MOs[i]);
@@ -992,17 +991,23 @@ static MachineInstr *FuseTwoAddrInst(unsigned Opcode,
MIB.addImm(1).addReg(0).addImm(0);
// Loop over the rest of the ri operands, converting them over.
+ unsigned NumOps = TII.getNumOperands(MI->getOpcode())-2;
for (unsigned i = 0; i != NumOps; ++i) {
MachineOperand &MO = MI->getOperand(i+2);
MIB = X86InstrAddOperand(MIB, MO);
}
+ for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ MIB = X86InstrAddOperand(MIB, MO);
+ }
return MIB;
}
static MachineInstr *FuseInst(unsigned Opcode, unsigned OpNo,
SmallVector<MachineOperand,4> &MOs,
MachineInstr *MI, const TargetInstrInfo &TII) {
- MachineInstrBuilder MIB = BuildMI(TII.get(Opcode));
+ MachineInstr *NewMI = new MachineInstr(TII.get(Opcode), true);
+ MachineInstrBuilder MIB(NewMI);
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
@@ -1036,7 +1041,6 @@ static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
MachineInstr*
X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, unsigned i,
SmallVector<MachineOperand,4> &MOs) const {
- // Table (and size) to search
const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL;
bool isTwoAddrFold = false;
unsigned NumOps = TII.getNumOperands(MI->getOpcode());
@@ -1117,6 +1121,49 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, unsigned OpNu
return foldMemoryOperand(MI, OpNum, MOs);
}
+unsigned X86RegisterInfo::getOpcodeAfterMemoryFold(unsigned Opc,
+ unsigned OpNum) const {
+ // Check switch flag
+ if (NoFusing) return 0;
+ const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL;
+ unsigned NumOps = TII.getNumOperands(Opc);
+ bool isTwoAddr = NumOps > 1 &&
+ TII.getOperandConstraint(Opc, 1, TOI::TIED_TO) != -1;
+
+ // Folding a memory location into the two-address part of a two-address
+ // instruction is different than folding it other places. It requires
+ // replacing the *two* registers with the memory location.
+ if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
+ OpcodeTablePtr = &RegOp2MemOpTable2Addr;
+ } else if (OpNum == 0) { // If operand 0
+ switch (Opc) {
+ case X86::MOV16r0:
+ return X86::MOV16mi;
+ case X86::MOV32r0:
+ return X86::MOV32mi;
+ case X86::MOV64r0:
+ return X86::MOV64mi32;
+ case X86::MOV8r0:
+ return X86::MOV8mi;
+ default: break;
+ }
+ OpcodeTablePtr = &RegOp2MemOpTable0;
+ } else if (OpNum == 1) {
+ OpcodeTablePtr = &RegOp2MemOpTable1;
+ } else if (OpNum == 2) {
+ OpcodeTablePtr = &RegOp2MemOpTable2;
+ }
+
+ if (OpcodeTablePtr) {
+ // Find the Opcode to fuse
+ DenseMap<unsigned*, unsigned>::iterator I =
+ OpcodeTablePtr->find((unsigned*)Opc);
+ if (I != OpcodeTablePtr->end())
+ return I->second;
+ }
+ return 0;
+}
+
bool X86RegisterInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
@@ -1126,14 +1173,14 @@ bool X86RegisterInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
return false;
unsigned Opc = I->second.first;
unsigned Index = I->second.second & 0xf;
- bool HasLoad = I->second.second & (1 << 4);
- bool HasStore = I->second.second & (1 << 5);
- if (UnfoldLoad && !HasLoad)
+ bool FoldedLoad = I->second.second & (1 << 4);
+ bool FoldedStore = I->second.second & (1 << 5);
+ if (UnfoldLoad && !FoldedLoad)
return false;
- HasLoad &= UnfoldLoad;
- if (UnfoldStore && !HasStore)
+ UnfoldLoad &= FoldedLoad;
+ if (UnfoldStore && !FoldedStore)
return false;
- HasStore &= UnfoldStore;
+ UnfoldStore &= FoldedStore;
const TargetInstrDescriptor &TID = TII.get(Opc);
const TargetOperandInfo &TOI = TID.OpInfo[Index];
@@ -1156,9 +1203,9 @@ bool X86RegisterInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
}
// Emit the load instruction.
- if (HasLoad) {
+ if (UnfoldLoad) {
loadRegFromAddr(MF, Reg, AddrOps, RC, NewMIs);
- if (HasStore) {
+ if (UnfoldStore) {
// Address operands cannot be marked isKill.
for (unsigned i = 1; i != 5; ++i) {
MachineOperand &MO = NewMIs[0]->getOperand(i);
@@ -1169,15 +1216,11 @@ bool X86RegisterInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
}
// Emit the data processing instruction.
- MachineInstr *DataMI = new MachineInstr (TID, true);
+ MachineInstr *DataMI = new MachineInstr(TID, true);
MachineInstrBuilder MIB(DataMI);
- const TargetRegisterClass *DstRC = 0;
- if (HasStore) {
- const TargetOperandInfo &DstTOI = TID.OpInfo[0];
- DstRC = (DstTOI.Flags & M_LOOK_UP_PTR_REG_CLASS)
- ? TII.getPointerRegClass() : getRegClass(DstTOI.RegClass);
+
+ if (FoldedStore)
MIB.addReg(Reg, true);
- }
for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
MIB = X86InstrAddOperand(MIB, BeforeOps[i]);
MIB.addReg(Reg);
@@ -1190,8 +1233,12 @@ bool X86RegisterInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
NewMIs.push_back(MIB);
// Emit the store instruction.
- if (HasStore)
+ if (UnfoldStore) {
+ const TargetOperandInfo &DstTOI = TID.OpInfo[0];
+ const TargetRegisterClass *DstRC = (DstTOI.Flags & M_LOOK_UP_PTR_REG_CLASS)
+ ? TII.getPointerRegClass() : getRegClass(DstTOI.RegClass);
storeRegToAddr(MF, Reg, AddrOps, DstRC, NewMIs);
+ }
return true;
}
@@ -1209,8 +1256,8 @@ X86RegisterInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
return false;
unsigned Opc = I->second.first;
unsigned Index = I->second.second & 0xf;
- bool HasLoad = I->second.second & (1 << 4);
- bool HasStore = I->second.second & (1 << 5);
+ bool FoldedLoad = I->second.second & (1 << 4);
+ bool FoldedStore = I->second.second & (1 << 5);
const TargetInstrDescriptor &TID = TII.get(Opc);
const TargetOperandInfo &TOI = TID.OpInfo[Index];
const TargetRegisterClass *RC = (TOI.Flags & M_LOOK_UP_PTR_REG_CLASS)
@@ -1233,7 +1280,7 @@ X86RegisterInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
// Emit the load instruction.
SDNode *Load = 0;
- if (HasLoad) {
+ if (FoldedLoad) {
MVT::ValueType VT = *RC->vt_begin();
Load = DAG.getTargetNode(getLoadRegOpcode(RC), VT, MVT::Other,
&AddrOps[0], AddrOps.size());
@@ -1261,7 +1308,7 @@ X86RegisterInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
NewNodes.push_back(NewNode);
// Emit the store instruction.
- if (HasStore) {
+ if (FoldedStore) {
AddrOps.pop_back();
AddrOps.push_back(SDOperand(NewNode, 0));
AddrOps.push_back(Chain);
@@ -1279,11 +1326,11 @@ unsigned X86RegisterInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
MemOp2RegOpTable.find((unsigned*)Opc);
if (I == MemOp2RegOpTable.end())
return 0;
- bool HasLoad = I->second.second & (1 << 4);
- bool HasStore = I->second.second & (1 << 5);
- if (UnfoldLoad && !HasLoad)
+ bool FoldedLoad = I->second.second & (1 << 4);
+ bool FoldedStore = I->second.second & (1 << 5);
+ if (UnfoldLoad && !FoldedLoad)
return 0;
- if (UnfoldStore && !HasStore)
+ if (UnfoldStore && !FoldedStore)
return 0;
return I->second.first;
}
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index c21868c..a17296b 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -132,6 +132,12 @@ public:
unsigned OpNum,
MachineInstr* LoadMI) const;
+ /// getOpcodeAfterMemoryFold - Returns the opcode of the would be new
+ /// instruction after load / store is folded into an instruction of the
+ /// specified opcode. It returns zero if the specified unfolding is not
+ /// possible.
+ unsigned getOpcodeAfterMemoryFold(unsigned Opc, unsigned OpNum) const;
+
/// unfoldMemoryOperand - Separate a single instruction which folded a load or
/// a store or a load and a store into two or more instruction. If this is
/// possible, returns true as well as the new instructions by reference.
@@ -143,8 +149,9 @@ public:
SmallVectorImpl<SDNode*> &NewNodes) const;
/// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new
- /// instruction after load / store are unfolded from the specified opcode.
- /// It returns zero if the specified unfolding is impossible.
+ /// instruction after load / store are unfolded from an instruction of the
+ /// specified opcode. It returns zero if the specified unfolding is not
+ /// possible.
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
bool UnfoldLoad, bool UnfoldStore) const;