aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2012-03-27 01:50:58 +0000
committerEvan Cheng <evan.cheng@apple.com>2012-03-27 01:50:58 +0000
commitd6c23557898b1fe9b21b28023f4133cab84a8b83 (patch)
treeed8c1ad3228568f97ea536b20704ac0cef257114
parentbca9c25dabbf50923319339ae780d6f916f56bfb (diff)
downloadexternal_llvm-d6c23557898b1fe9b21b28023f4133cab84a8b83.zip
external_llvm-d6c23557898b1fe9b21b28023f4133cab84a8b83.tar.gz
external_llvm-d6c23557898b1fe9b21b28023f4133cab84a8b83.tar.bz2
Post-ra LICM should take care not to hoist an instruction that would clobber a
register that's read by the preheader terminator. rdar://11095580 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153492 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/MachineLICM.cpp32
-rw-r--r--test/CodeGen/X86/2012-03-26-PostRALICMBug.ll59
2 files changed, 87 insertions, 4 deletions
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 428a9d9..dfc622d 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -478,6 +478,10 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
/// invariants out to the preheader.
void MachineLICM::HoistRegionPostRA() {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
unsigned NumRegs = TRI->getNumRegs();
BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
@@ -514,25 +518,46 @@ void MachineLICM::HoistRegionPostRA() {
}
}
+ // Gather the registers read / clobbered by the terminator.
+ BitVector TermRegs(NumRegs);
+ MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
+ if (TI != Preheader->end()) {
+ for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = TI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS)
+ TermRegs.set(*AS);
+ }
+ }
+
// Now evaluate whether the potential candidates qualify.
// 1. Check if the candidate defined register is defined by another
// instruction in the loop.
// 2. If the candidate is a load from stack slot (always true for now),
// check if the slot is stored anywhere in the loop.
+ // 3. Make sure candidate def should not clobber
+ // registers read by the terminator. Similarly its def should not be
+ // clobbered by the terminator.
for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
if (Candidates[i].FI != INT_MIN &&
StoredFIs.count(Candidates[i].FI))
continue;
- if (!PhysRegClobbers.test(Candidates[i].Def)) {
+ unsigned Def = Candidates[i].Def;
+ if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
bool Safe = true;
MachineInstr *MI = Candidates[i].MI;
for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
const MachineOperand &MO = MI->getOperand(j);
if (!MO.isReg() || MO.isDef() || !MO.getReg())
continue;
- if (PhysRegDefs.test(MO.getReg()) ||
- PhysRegClobbers.test(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+ if (PhysRegDefs.test(Reg) ||
+ PhysRegClobbers.test(Reg)) {
// If it's using a non-loop-invariant register, then it's obviously
// not safe to hoist.
Safe = false;
@@ -571,7 +596,6 @@ void MachineLICM::AddToLiveIns(unsigned Reg) {
/// dirty work.
void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
MachineBasicBlock *Preheader = getCurPreheader();
- if (!Preheader) return;
// Now move the instructions to the predecessor, inserting it before any
// terminator instructions.
diff --git a/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
new file mode 100644
index 0000000..101ecca
--- /dev/null
+++ b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -stats |& \
+; RUN: not grep {Number of machine instructions hoisted out of loops post regalloc}
+
+; rdar://11095580
+
+%struct.ref_s = type { %union.color_sample, i16, i16 }
+%union.color_sample = type { i64 }
+
+@table = external global [3891 x i64]
+
+declare i32 @foo()
+
+define i32 @zarray(%struct.ref_s* nocapture %op) nounwind ssp {
+entry:
+ %call = tail call i32 @foo()
+ %tmp = ashr i32 %call, 31
+ %0 = and i32 %tmp, 1396
+ %index9 = add i32 %0, 2397
+ indirectbr i8* undef, [label %return, label %if.end]
+
+if.end: ; preds = %entry
+ %size5 = getelementptr inbounds %struct.ref_s* %op, i64 0, i32 2
+ %tmp6 = load i16* %size5, align 2
+ %tobool1 = icmp eq i16 %tmp6, 0
+ %1 = select i1 %tobool1, i32 1396, i32 -1910
+ %index10 = add i32 %index9, %1
+ indirectbr i8* undef, [label %return, label %while.body.lr.ph]
+
+while.body.lr.ph: ; preds = %if.end
+ %refs = bitcast %struct.ref_s* %op to %struct.ref_s**
+ %tmp9 = load %struct.ref_s** %refs, align 8
+ %tmp4 = zext i16 %tmp6 to i64
+ %index13 = add i32 %index10, 1658
+ %2 = sext i32 %index13 to i64
+ %3 = getelementptr [3891 x i64]* @table, i64 0, i64 %2
+ %blockaddress14 = load i64* %3, align 8
+ %4 = inttoptr i64 %blockaddress14 to i8*
+ indirectbr i8* %4, [label %while.body]
+
+while.body: ; preds = %while.body, %while.body.lr.ph
+ %index7 = phi i32 [ %index15, %while.body ], [ %index13, %while.body.lr.ph ]
+ %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.lr.ph ]
+ %type_attrs = getelementptr %struct.ref_s* %tmp9, i64 %indvar, i32 1
+ store i16 32, i16* %type_attrs, align 2
+ %indvar.next = add i64 %indvar, 1
+ %exitcond5 = icmp eq i64 %indvar.next, %tmp4
+ %tmp7 = select i1 %exitcond5, i32 1648, i32 0
+ %index15 = add i32 %index7, %tmp7
+ %tmp8 = select i1 %exitcond5, i64 13, i64 0
+ %5 = sext i32 %index15 to i64
+ %6 = getelementptr [3891 x i64]* @table, i64 0, i64 %5
+ %blockaddress16 = load i64* %6, align 8
+ %7 = inttoptr i64 %blockaddress16 to i8*
+ indirectbr i8* %7, [label %return, label %while.body]
+
+return: ; preds = %while.body, %if.end, %entry
+ %retval.0 = phi i32 [ %call, %entry ], [ 0, %if.end ], [ 0, %while.body ]
+ ret i32 %retval.0
+}