aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Trick <atrick@apple.com>2012-07-13 23:33:10 +0000
committerAndrew Trick <atrick@apple.com>2012-07-13 23:33:10 +0000
commite08c32249fca32cd7b122024a4ca252fcb235694 (patch)
tree923f5912e0f49151d6ce7ff084b99bc6cec72c3e
parent31f61e8b221eee4f839687e9ec4df89a469a2652 (diff)
downloadexternal_llvm-e08c32249fca32cd7b122024a4ca252fcb235694.zip
external_llvm-e08c32249fca32cd7b122024a4ca252fcb235694.tar.gz
external_llvm-e08c32249fca32cd7b122024a4ca252fcb235694.tar.bz2
LSR Fix: check SCEV expression safety before expansion.
All SCEV expressions used by LSR formulae must be safe to expand. i.e. they may not contain UDiv unless we can prove nonzero denominator. Fixes PR11356: LSR hoists UDiv. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160205 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Analysis/ScalarEvolutionExpander.h4
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp41
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp2
-rw-r--r--test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll90
4 files changed, 136 insertions, 1 deletions
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index cbbe429..3f8f149 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -24,6 +24,10 @@
namespace llvm {
class TargetLowering;
+ /// Return true if the given expression is safe to expand in the sense that
+ /// all materialized values are safe to speculate.
+ bool isSafeToExpand(const SCEV *S);
+
/// SCEVExpander - This class uses information about analyze scalars to
/// rewrite expressions in canonical form.
///
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index e7fe672a..b77f8d6 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1700,3 +1700,44 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
}
return NumElim;
}
+
+namespace {
+// Search for a SCEV subexpression that is not safe to expand. Any expression
+// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
+// UDiv expressions. We don't know if the UDiv is derived from an IR divide
+// instruction, but the important thing is that we prove the denominator is
+// nonzero before expansion.
+//
+// IVUsers already checks that IV-derived expressions are safe. So this check is
+// only needed when the expression includes some subexpression that is not IV
+// derived.
+//
+// Currently, we only allow division by a nonzero constant here. If this is
+// inadequate, we could easily allow division by SCEVUnknown by using
+// ValueTracking to check isKnownNonZero().
+struct SCEVFindUnsafe {
+ bool IsUnsafe;
+
+ SCEVFindUnsafe(): IsUnsafe(false) {}
+
+ bool follow(const SCEV *S) {
+ const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S);
+ if (!D)
+ return true;
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
+ if (SC && !SC->getValue()->isZero())
+ return true;
+ IsUnsafe = true;
+ return false;
+ }
+ bool isDone() const { return IsUnsafe; }
+};
+}
+
+namespace llvm {
+bool isSafeToExpand(const SCEV *S) {
+ SCEVFindUnsafe Search;
+ visitAll(S, Search);
+ return !Search.IsUnsafe;
+}
+}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 4ba969e..c0cb13d 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -2836,7 +2836,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
- if (SE.isLoopInvariant(N, L)) {
+ if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
N = TransformForPostIncUse(Normalize, N, CI, 0,
diff --git a/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll b/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll
new file mode 100644
index 0000000..a122208
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll
@@ -0,0 +1,90 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; PR11356: likely wrong code bug
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin"
+
+@g_66 = global [1 x i32] zeroinitializer, align 4
+@g_775 = global i32 0, align 4
+@g_752 = global i32 0, align 4
+@g_3 = global i32 0, align 4
+
+; Ensure that %div.i.i.us is not hoisted.
+; CHECK: @main
+; CHECK: for.body.i.i.us:
+; CHECK: %div.i.i.i.us
+; CHECK: %cmp5.i.i.us
+define i32 @main() nounwind uwtable ssp {
+entry:
+ %l_2 = alloca [1 x i32], align 4
+ %arrayidx = getelementptr inbounds [1 x i32]* %l_2, i64 0, i64 0
+ store i32 0, i32* %arrayidx, align 4, !tbaa !0
+ %tmp = load i32* @g_3, align 4, !tbaa !0
+ %idxprom = sext i32 %tmp to i64
+ %arrayidx1 = getelementptr inbounds [1 x i32]* %l_2, i64 0, i64 %idxprom
+ %tmp1 = load i32* %arrayidx1, align 4, !tbaa !0
+ %conv.i.i = and i32 %tmp1, 65535
+ %tobool.i.i.i = icmp ne i32 %tmp, 0
+ br label %codeRepl
+
+codeRepl.loopexit.us-lcssa: ; preds = %for.body.i.i, %codeRepl5
+ br label %codeRepl.loopexit
+
+codeRepl.loopexit: ; preds = %codeRepl.loopexit.us-lcssa.us, %codeRepl.loopexit.us-lcssa
+ br label %codeRepl
+
+codeRepl: ; preds = %codeRepl.loopexit, %entry
+ br i1 %tobool.i.i.i, label %codeRepl.split.us, label %codeRepl.codeRepl.split_crit_edge
+
+codeRepl.codeRepl.split_crit_edge: ; preds = %codeRepl
+ br label %codeRepl.split
+
+codeRepl.split.us: ; preds = %codeRepl
+ br label %for.cond.i.i.us
+
+for.cond.i.i.us: ; preds = %for.inc.i.i.us, %codeRepl.split.us
+ %tmp2 = phi i32 [ 0, %codeRepl.split.us ], [ %add.i.i.us, %for.inc.i.i.us ]
+ br label %codeRepl5.us
+
+for.inc.i.i.us: ; preds = %for.body.i.i.us
+ %add.i.i.us = add nsw i32 %tmp2, 1
+ store i32 %add.i.i.us, i32* @g_752, align 4, !tbaa !0
+ br label %for.cond.i.i.us
+
+for.body.i.i.us: ; preds = %codeRepl5.us
+ %div.i.i.i.us = udiv i32 1, %conv.i.i
+ %cmp5.i.i.us = icmp eq i32 %div.i.i.i.us, %tmp2
+ br i1 %cmp5.i.i.us, label %codeRepl.loopexit.us-lcssa.us, label %for.inc.i.i.us
+
+codeRepl5.us: ; preds = %for.cond.i.i.us
+ br i1 true, label %codeRepl.loopexit.us-lcssa.us, label %for.body.i.i.us
+
+codeRepl.loopexit.us-lcssa.us: ; preds = %codeRepl5.us, %for.body.i.i.us
+ br label %codeRepl.loopexit
+
+codeRepl.split: ; preds = %codeRepl.codeRepl.split_crit_edge
+ br label %for.cond.i.i
+
+for.cond.i.i: ; preds = %for.inc.i.i, %codeRepl.split
+ %tmp3 = phi i32 [ 0, %codeRepl.split ], [ %add.i.i, %for.inc.i.i ]
+ br label %codeRepl5
+
+codeRepl5: ; preds = %for.cond.i.i
+ br i1 true, label %codeRepl.loopexit.us-lcssa, label %for.body.i.i
+
+for.body.i.i: ; preds = %codeRepl5
+ %cmp5.i.i = icmp eq i32 0, %tmp3
+ br i1 %cmp5.i.i, label %codeRepl.loopexit.us-lcssa, label %for.inc.i.i
+
+for.inc.i.i: ; preds = %for.body.i.i
+ %add.i.i = add nsw i32 %tmp3, 1
+ store i32 %add.i.i, i32* @g_752, align 4, !tbaa !0
+ br label %for.cond.i.i
+
+func_4.exit: ; No predecessors!
+ ret i32 0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}