aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2007-12-01 02:07:52 +0000
committerEvan Cheng <evan.cheng@apple.com>2007-12-01 02:07:52 +0000
commitff52f082846db6a31d73cb6f02f62401e20eda6f (patch)
tree5d910203fd66c0b5594d9f29a069ae8125283566
parent8b274478b4006a9098a9fd28625c80dc82bd9e2a (diff)
downloadexternal_llvm-ff52f082846db6a31d73cb6f02f62401e20eda6f.zip
external_llvm-ff52f082846db6a31d73cb6f02f62401e20eda6f.tar.gz
external_llvm-ff52f082846db6a31d73cb6f02f62401e20eda6f.tar.bz2
Allow some reloads to be folded in multi-use cases. Specifically testl r, r -> cmpl [mem], 0.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44479 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/CodeGen/LiveIntervalAnalysis.h3
-rw-r--r--include/llvm/Target/MRegisterInfo.h16
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp54
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.h12
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.h12
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp6
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h16
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h12
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h12
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp75
-rw-r--r--lib/Target/X86/X86RegisterInfo.h13
-rw-r--r--test/CodeGen/X86/2007-08-13-SpillerReuse.ll2
-rw-r--r--test/CodeGen/X86/2007-11-30-TestLoadFolding.ll58
13 files changed, 259 insertions, 32 deletions
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index 725beb7..5ada1ad 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -275,7 +275,8 @@ namespace llvm {
/// returns true.
bool tryFoldMemoryOperand(MachineInstr* &MI, VirtRegMap &vrm,
MachineInstr *DefMI, unsigned InstrIdx,
- unsigned OpIdx, unsigned NumUses,
+ unsigned OpIdx,
+ SmallVector<unsigned, 2> &UseOps,
bool isSS, int Slot, unsigned Reg);
/// anyKillInMBBAfterIdx - Returns true if there is a kill of the specified
diff --git a/include/llvm/Target/MRegisterInfo.h b/include/llvm/Target/MRegisterInfo.h
index bff74de..ecbee64 100644
--- a/include/llvm/Target/MRegisterInfo.h
+++ b/include/llvm/Target/MRegisterInfo.h
@@ -543,6 +543,14 @@ public:
return 0;
}
+ /// foldMemoryOperand - Same as previous except it tries to fold instruction
+ /// with multiple uses of the same register.
+ virtual MachineInstr* foldMemoryOperand(MachineInstr* MI,
+ SmallVectorImpl<unsigned> &UseOps,
+ int FrameIndex) const {
+ return 0;
+ }
+
/// foldMemoryOperand - Same as the previous version except it allows folding
/// of any load and store from / to any address, not just from a specific
/// stack slot.
@@ -552,6 +560,14 @@ public:
return 0;
}
+ /// foldMemoryOperand - Same as previous except it tries to fold instruction
+ /// with multiple uses of the same register.
+ virtual MachineInstr* foldMemoryOperand(MachineInstr* MI,
+ SmallVectorImpl<unsigned> &UseOps,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
/// getOpcodeAfterMemoryFold - Returns the opcode of the would be new
/// instruction after load / store is folded into an instruction of the
/// specified opcode. It returns zero if the specified unfolding is not
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index f902f4b..2b22faf 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -644,20 +644,27 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
VirtRegMap &vrm, MachineInstr *DefMI,
unsigned InstrIdx, unsigned OpIdx,
- unsigned NumUses,
+ SmallVector<unsigned, 2> &UseOps,
bool isSS, int Slot, unsigned Reg) {
// FIXME: fold subreg use
if (MI->getOperand(OpIdx).getSubReg())
return false;
- // FIXME: It may be possible to fold load when there are multiple uses.
- // e.g. On x86, TEST32rr r, r -> CMP32rm [mem], 0
- if (NumUses > 1)
- return false;
+ MachineInstr *fmi = NULL;
+
+ if (UseOps.size() < 2)
+ fmi = isSS ? mri_->foldMemoryOperand(MI, OpIdx, Slot)
+ : mri_->foldMemoryOperand(MI, OpIdx, DefMI);
+ else {
+ if (OpIdx != UseOps[0])
+ // Must be two-address instruction + one more use. Not going to fold.
+ return false;
+ // It may be possible to fold load when there are multiple uses.
+ // e.g. On x86, TEST32rr r, r -> CMP32rm [mem], 0
+ fmi = isSS ? mri_->foldMemoryOperand(MI, UseOps, Slot)
+ : mri_->foldMemoryOperand(MI, UseOps, DefMI);
+ }
- MachineInstr *fmi = isSS
- ? mri_->foldMemoryOperand(MI, OpIdx, Slot)
- : mri_->foldMemoryOperand(MI, OpIdx, DefMI);
if (fmi) {
// Attempt to fold the memory reference into the instruction. If
// we can do this, we don't need to insert spill code.
@@ -768,7 +775,9 @@ rewriteInstructionForSpills(const LiveInterval &li, bool TrySplit,
HasUse = mop.isUse();
HasDef = mop.isDef();
- unsigned NumUses = HasUse;
+ SmallVector<unsigned, 2> UseOps;
+ if (HasUse)
+ UseOps.push_back(i);
std::vector<unsigned> UpdateOps;
for (unsigned j = i+1, e = MI->getNumOperands(); j != e; ++j) {
if (!MI->getOperand(j).isRegister())
@@ -779,7 +788,7 @@ rewriteInstructionForSpills(const LiveInterval &li, bool TrySplit,
if (RegJ == RegI) {
UpdateOps.push_back(j);
if (MI->getOperand(j).isUse())
- ++NumUses;
+ UseOps.push_back(j);
HasUse |= MI->getOperand(j).isUse();
HasDef |= MI->getOperand(j).isDef();
}
@@ -787,7 +796,7 @@ rewriteInstructionForSpills(const LiveInterval &li, bool TrySplit,
if (TryFold &&
tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, i,
- NumUses, FoldSS, FoldSlot, Reg)) {
+ UseOps, FoldSS, FoldSlot, Reg)) {
// Folding the load/store can completely change the instruction in
// unpredictable ways, rescan it from the beginning.
HasUse = false;
@@ -1207,6 +1216,7 @@ addIntervalsForSpills(const LiveInterval &li,
if (!TrySplit)
return NewLIs;
+ SmallVector<unsigned, 2> UseOps;
if (NeedStackSlot) {
int Id = SpillMBBs.find_first();
while (Id != -1) {
@@ -1217,7 +1227,7 @@ addIntervalsForSpills(const LiveInterval &li,
bool isReMat = vrm.isReMaterialized(VReg);
MachineInstr *MI = getInstructionFromIndex(index);
int OpIdx = -1;
- unsigned NumUses = 0;
+ UseOps.clear();
if (spills[i].canFold) {
for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
MachineOperand &MO = MI->getOperand(j);
@@ -1230,20 +1240,20 @@ addIntervalsForSpills(const LiveInterval &li,
// Can't fold if it's two-address code and the use isn't the
// first and only use.
if (isReMat ||
- (NumUses == 0 && !alsoFoldARestore(Id, index, VReg, RestoreMBBs,
- RestoreIdxes))) {
+ (UseOps.empty() && !alsoFoldARestore(Id, index, VReg,
+ RestoreMBBs, RestoreIdxes))) {
OpIdx = -1;
break;
}
- ++NumUses;
+ UseOps.push_back(j);
}
}
// Fold the store into the def if possible.
bool Folded = false;
if (OpIdx != -1) {
- if (tryFoldMemoryOperand(MI, vrm, NULL, index, OpIdx, NumUses,
+ if (tryFoldMemoryOperand(MI, vrm, NULL, index, OpIdx, UseOps,
true, Slot, VReg)) {
- if (NumUses)
+ if (!UseOps.empty())
// Folded a two-address instruction, do not issue a load.
eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes);
Folded = true;
@@ -1267,8 +1277,8 @@ addIntervalsForSpills(const LiveInterval &li,
continue;
unsigned VReg = restores[i].vreg;
MachineInstr *MI = getInstructionFromIndex(index);
- unsigned NumUses = 0;
int OpIdx = -1;
+ UseOps.clear();
if (restores[i].canFold) {
for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
MachineOperand &MO = MI->getOperand(j);
@@ -1280,10 +1290,10 @@ addIntervalsForSpills(const LiveInterval &li,
OpIdx = -1;
break;
}
- if (NumUses == 0)
+ if (UseOps.empty())
// Use the first use index.
OpIdx = (int)j;
- ++NumUses;
+ UseOps.push_back(j);
}
}
@@ -1298,9 +1308,9 @@ addIntervalsForSpills(const LiveInterval &li,
if (isLoadSS ||
(ReMatDefMI->getInstrDescriptor()->Flags & M_LOAD_FLAG))
Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, OpIdx,
- NumUses, isLoadSS, LdSlot, VReg);
+ UseOps, isLoadSS, LdSlot, VReg);
} else
- Folded = tryFoldMemoryOperand(MI, vrm, NULL, index, OpIdx, NumUses,
+ Folded = tryFoldMemoryOperand(MI, vrm, NULL, index, OpIdx, UseOps,
true, Slot, VReg);
}
// If folding is not possible / failed, then tell the spiller to issue a
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index b1110db..97be04f 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -77,11 +77,23 @@ public:
MachineInstr* foldMemoryOperand(MachineInstr* MI, unsigned OpNum,
int FrameIndex) const;
+ MachineInstr* foldMemoryOperand(MachineInstr* MI,
+ SmallVectorImpl<unsigned> &UseOps,
+ int FrameIndex) const {
+ return 0;
+ }
+
MachineInstr* foldMemoryOperand(MachineInstr* MI, unsigned OpNum,
MachineInstr* LoadMI) const {
return 0;
}
+ MachineInstr* foldMemoryOperand(MachineInstr* MI,
+ SmallVectorImpl<unsigned> &UseOps,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
const TargetRegisterClass* const*
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h
index 6275cb4..97d3280 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.h
+++ b/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -51,11 +51,23 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
MachineInstr* foldMemoryOperand(MachineInstr *MI, unsigned OpNum,
int FrameIndex) const;
+ MachineInstr* foldMemoryOperand(MachineInstr* MI,
+ SmallVectorImpl<unsigned> &UseOps,
+ int FrameIndex) const {
+ return 0;
+ }
+
MachineInstr* foldMemoryOperand(MachineInstr* MI, unsigned OpNum,
MachineInstr* LoadMI) const {
return 0;
}
+ MachineInstr* foldMemoryOperand(MachineInstr* MI,
+ SmallVectorImpl<unsigned> &UseOps,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
void copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 9dc8563..94cf59b 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -203,12 +203,6 @@ foldMemoryOperand(MachineInstr* MI, unsigned OpNum, int FI) const
return NewMI;
}
-MachineInstr *MipsRegisterInfo::
-foldMemoryOperand(MachineInstr* MI, unsigned OpNum,
- MachineInstr* LoadMI) const {
- return NULL;
-}
-
//===----------------------------------------------------------------------===//
//
// Callee Saved Registers methods
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 636ffb9..123f6e8 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -58,8 +58,22 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
MachineInstr* foldMemoryOperand(MachineInstr* MI, unsigned OpNum,
int FrameIndex) const;
+ MachineInstr* foldMemoryOperand(MachineInstr* MI,
+ SmallVectorImpl<unsigned> &UseOps,
+ int FrameIndex) const {
+ return 0;
+ }
+
MachineInstr* foldMemoryOperand(MachineInstr* MI, unsigned OpNum,
- MachineInstr* LoadMI) const;
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
+ MachineInstr* foldMemoryOperand(MachineInstr* MI,
+ SmallVectorImpl<unsigned> &UseOps,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
void copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
unsigned DestReg, unsigned SrcReg,
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 776d11c..3fce892 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -68,11 +68,23 @@ public:
virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, unsigned OpNum,
int FrameIndex) const;
+ virtual MachineInstr* foldMemoryOperand(MachineInstr* MI,
+ SmallVectorImpl<unsigned> &UseOps,
+ int FrameIndex) const {
+ return 0;
+ }
+
virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, unsigned OpNum,
MachineInstr* LoadMI) const {
return 0;
}
+ virtual MachineInstr* foldMemoryOperand(MachineInstr* MI,
+ SmallVectorImpl<unsigned> &UseOps,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
const TargetRegisterClass* const*
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index dec01e0..cecbc8a 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -63,11 +63,23 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
int FrameIndex) const;
virtual MachineInstr* foldMemoryOperand(MachineInstr* MI,
+ SmallVectorImpl<unsigned> &UseOps,
+ int FrameIndex) const {
+ return 0;
+ }
+
+ virtual MachineInstr* foldMemoryOperand(MachineInstr* MI,
unsigned OpNum,
MachineInstr* LoadMI) const {
return 0;
}
+ virtual MachineInstr* foldMemoryOperand(MachineInstr* MI,
+ SmallVectorImpl<unsigned> &UseOps,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
const TargetRegisterClass* const* getCalleeSavedRegClasses(
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 25b6375..29f401a 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -1149,6 +1149,31 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, unsigned OpNu
return foldMemoryOperand(MI, OpNum, MOs);
}
+MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
+ SmallVectorImpl<unsigned> &UseOps,
+ int FrameIndex) const {
+ // Check switch flag
+ if (NoFusing) return NULL;
+
+ if (UseOps.size() == 1)
+ return foldMemoryOperand(MI, UseOps[0], FrameIndex);
+ else if (UseOps.size() != 2 || UseOps[0] != 0 && UseOps[1] != 1)
+ return NULL;
+
+ unsigned NewOpc = 0;
+ switch (MI->getOpcode()) {
+ default: return NULL;
+ case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri32; break;
+ }
+ // Change to CMPXXri r, 0 first.
+ MI->setInstrDescriptor(TII.get(NewOpc));
+ MI->getOperand(1).ChangeToImmediate(0);
+ return foldMemoryOperand(MI, 0, FrameIndex);
+}
+
MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, unsigned OpNum,
MachineInstr *LoadMI) const {
// Check switch flag
@@ -1160,6 +1185,31 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, unsigned OpNu
return foldMemoryOperand(MI, OpNum, MOs);
}
+MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
+ SmallVectorImpl<unsigned> &UseOps,
+ MachineInstr *LoadMI) const {
+ // Check switch flag
+ if (NoFusing) return NULL;
+
+ if (UseOps.size() == 1)
+ return foldMemoryOperand(MI, UseOps[0], LoadMI);
+ else if (UseOps.size() != 2 || UseOps[0] != 0 && UseOps[1] != 1)
+ return NULL;
+ unsigned NewOpc = 0;
+ switch (MI->getOpcode()) {
+ default: return NULL;
+ case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri32; break;
+ }
+ // Change to CMPXXri r, 0 first.
+ MI->setInstrDescriptor(TII.get(NewOpc));
+ MI->getOperand(1).ChangeToImmediate(0);
+ return foldMemoryOperand(MI, 0, LoadMI);
+}
+
+
unsigned X86RegisterInfo::getOpcodeAfterMemoryFold(unsigned Opc,
unsigned OpNum) const {
// Check switch flag
@@ -1270,7 +1320,30 @@ bool X86RegisterInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
MachineOperand &MO = ImpOps[i];
MIB.addReg(MO.getReg(), MO.isDef(), true, MO.isKill(), MO.isDead());
}
- NewMIs.push_back(MIB);
+ // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
+ unsigned NewOpc = 0;
+ switch (DataMI->getOpcode()) {
+ default: break;
+ case X86::CMP64ri32:
+ case X86::CMP32ri:
+ case X86::CMP16ri:
+ case X86::CMP8ri: {
+ MachineOperand &MO0 = DataMI->getOperand(0);
+ MachineOperand &MO1 = DataMI->getOperand(1);
+ if (MO1.getImm() == 0) {
+ switch (DataMI->getOpcode()) {
+ default: break;
+ case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
+ case X86::CMP32ri: NewOpc = X86::TEST32rr; break;
+ case X86::CMP16ri: NewOpc = X86::TEST16rr; break;
+ case X86::CMP8ri: NewOpc = X86::TEST8rr; break;
+ }
+ DataMI->setInstrDescriptor(TII.get(NewOpc));
+ MO1.ChangeToRegister(MO0.getReg(), false);
+ }
+ }
+ }
+ NewMIs.push_back(DataMI);
// Emit the store instruction.
if (UnfoldStore) {
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 18e8b90..53f0844 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -141,6 +141,12 @@ public:
unsigned OpNum,
int FrameIndex) const;
+ /// foldMemoryOperand - Same as previous except it tries to fold instruction
+ /// with multiple uses of the same register.
+ MachineInstr* foldMemoryOperand(MachineInstr* MI,
+ SmallVectorImpl<unsigned> &UseOps,
+ int FrameIndex) const;
+
/// foldMemoryOperand - Same as the previous version except it allows folding
/// of any load and store from / to any address, not just from a specific
/// stack slot.
@@ -148,6 +154,13 @@ public:
unsigned OpNum,
MachineInstr* LoadMI) const;
+ /// foldMemoryOperand - Same as the previous version except it allows folding
+ /// of any load and store from / to any address, not just from a specific
+ /// stack slot.
+ MachineInstr* foldMemoryOperand(MachineInstr* MI,
+ SmallVectorImpl<unsigned> &UseOps,
+ MachineInstr* LoadMI) const;
+
/// getOpcodeAfterMemoryFold - Returns the opcode of the would be new
/// instruction after load / store is folded into an instruction of the
/// specified opcode. It returns zero if the specified unfolding is not
diff --git a/test/CodeGen/X86/2007-08-13-SpillerReuse.ll b/test/CodeGen/X86/2007-08-13-SpillerReuse.ll
index 8cc235b..0ad6781 100644
--- a/test/CodeGen/X86/2007-08-13-SpillerReuse.ll
+++ b/test/CodeGen/X86/2007-08-13-SpillerReuse.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin | grep "48(%esp)" | count 5
+; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin | grep "48(%esp)" | count 6
%struct..0anon = type { i32 }
%struct.rtvec_def = type { i32, [1 x %struct..0anon] }
diff --git a/test/CodeGen/X86/2007-11-30-TestLoadFolding.ll b/test/CodeGen/X86/2007-11-30-TestLoadFolding.ll
new file mode 100644
index 0000000..25fc6ac
--- /dev/null
+++ b/test/CodeGen/X86/2007-11-30-TestLoadFolding.ll
@@ -0,0 +1,58 @@
+; RUN: llvm-as < %s | llc -march=x86 -stats |& \
+; RUN: grep {2 .*folded into instructions}
+; RUN: llvm-as < %s | llc -march=x86 | grep cmp | count 3
+
+ %struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
+
+define fastcc i32 @perimeter(%struct.quad_struct* %tree, i32 %size) {
+entry:
+ %tree.idx7.val = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
+ %tmp8.i51 = icmp eq %struct.quad_struct* %tree.idx7.val, null ; <i1> [#uses=2]
+ br i1 %tmp8.i51, label %cond_next, label %cond_next.i52
+
+cond_next.i52: ; preds = %entry
+ ret i32 0
+
+cond_next: ; preds = %entry
+ %tmp59 = load i32* null, align 4 ; <i32> [#uses=1]
+ %tmp70 = icmp eq i32 %tmp59, 2 ; <i1> [#uses=1]
+ br i1 %tmp70, label %cond_true.i35, label %bb80
+
+cond_true.i35: ; preds = %cond_next
+ %tmp14.i.i37 = load %struct.quad_struct** null, align 4 ; <%struct.quad_struct*> [#uses=1]
+ %tmp3.i160 = load i32* null, align 4 ; <i32> [#uses=1]
+ %tmp4.i161 = icmp eq i32 %tmp3.i160, 2 ; <i1> [#uses=1]
+ br i1 %tmp4.i161, label %cond_true.i163, label %cond_false.i178
+
+cond_true.i163: ; preds = %cond_true.i35
+ %tmp7.i162 = sdiv i32 %size, 4 ; <i32> [#uses=2]
+ %tmp13.i168 = tail call fastcc i32 @sum_adjacent( %struct.quad_struct* null, i32 3, i32 2, i32 %tmp7.i162 ) ; <i32> [#uses=1]
+ %tmp18.i11.i170 = getelementptr %struct.quad_struct* %tmp14.i.i37, i32 0, i32 4 ; <%struct.quad_struct**> [#uses=1]
+ %tmp19.i12.i171 = load %struct.quad_struct** %tmp18.i11.i170, align 4 ; <%struct.quad_struct*> [#uses=1]
+ %tmp21.i173 = tail call fastcc i32 @sum_adjacent( %struct.quad_struct* %tmp19.i12.i171, i32 3, i32 2, i32 %tmp7.i162 ) ; <i32> [#uses=1]
+ %tmp22.i174 = add i32 %tmp21.i173, %tmp13.i168 ; <i32> [#uses=1]
+ br i1 false, label %cond_true.i141, label %cond_false.i156
+
+cond_false.i178: ; preds = %cond_true.i35
+ ret i32 0
+
+cond_true.i141: ; preds = %cond_true.i163
+ %tmp7.i140 = sdiv i32 %size, 4 ; <i32> [#uses=1]
+ %tmp21.i151 = tail call fastcc i32 @sum_adjacent( %struct.quad_struct* null, i32 3, i32 2, i32 %tmp7.i140 ) ; <i32> [#uses=0]
+ ret i32 0
+
+cond_false.i156: ; preds = %cond_true.i163
+ %tmp22.i44 = add i32 0, %tmp22.i174 ; <i32> [#uses=0]
+ br i1 %tmp8.i51, label %bb22.i, label %cond_next.i
+
+bb80: ; preds = %cond_next
+ ret i32 0
+
+cond_next.i: ; preds = %cond_false.i156
+ ret i32 0
+
+bb22.i: ; preds = %cond_false.i156
+ ret i32 0
+}
+
+declare fastcc i32 @sum_adjacent(%struct.quad_struct*, i32, i32, i32)