diff options
-rw-r--r-- | include/llvm/Analysis/ScalarEvolutionExpander.h | 4 | ||||
-rw-r--r-- | lib/Analysis/ScalarEvolutionExpander.cpp | 41 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 2 | ||||
-rw-r--r-- | test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll | 90 |
4 files changed, 136 insertions, 1 deletions
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index cbbe429..3f8f149 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -24,6 +24,10 @@ namespace llvm { class TargetLowering; + /// Return true if the given expression is safe to expand in the sense that + /// all materialized values are safe to speculate. + bool isSafeToExpand(const SCEV *S); + /// SCEVExpander - This class uses information about analyze scalars to /// rewrite expressions in canonical form. /// diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index e7fe672a..b77f8d6 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1700,3 +1700,44 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, } return NumElim; } + +namespace { +// Search for a SCEV subexpression that is not safe to expand. Any expression +// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely +// UDiv expressions. We don't know if the UDiv is derived from an IR divide +// instruction, but the important thing is that we prove the denominator is +// nonzero before expansion. +// +// IVUsers already checks that IV-derived expressions are safe. So this check is +// only needed when the expression includes some subexpression that is not IV +// derived. +// +// Currently, we only allow division by a nonzero constant here. If this is +// inadequate, we could easily allow division by SCEVUnknown by using +// ValueTracking to check isKnownNonZero(). +struct SCEVFindUnsafe { + bool IsUnsafe; + + SCEVFindUnsafe(): IsUnsafe(false) {} + + bool follow(const SCEV *S) { + const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S); + if (!D) + return true; + const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS()); + if (SC && !SC->getValue()->isZero()) + return true; + IsUnsafe = true; + return false; + } + bool isDone() const { return IsUnsafe; } +}; +} + +namespace llvm { +bool isSafeToExpand(const SCEV *S) { + SCEVFindUnsafe Search; + visitAll(S, Search); + return !Search.IsUnsafe; +} +} diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 4ba969e..c0cb13d 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -2836,7 +2836,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { // x == y --> x - y == 0 const SCEV *N = SE.getSCEV(NV); - if (SE.isLoopInvariant(N, L)) { + if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) { // S is normalized, so normalize N before folding it into S // to keep the result normalized. N = TransformForPostIncUse(Normalize, N, CI, 0, diff --git a/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll b/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll new file mode 100644 index 0000000..a122208 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll @@ -0,0 +1,90 @@ +; RUN: opt -loop-reduce -S < %s | FileCheck %s +; +; PR11356: likely wrong code bug +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-darwin" + +@g_66 = global [1 x i32] zeroinitializer, align 4 +@g_775 = global i32 0, align 4 +@g_752 = global i32 0, align 4 +@g_3 = global i32 0, align 4 + +; Ensure that %div.i.i.us is not hoisted. +; CHECK: @main +; CHECK: for.body.i.i.us: +; CHECK: %div.i.i.i.us +; CHECK: %cmp5.i.i.us +define i32 @main() nounwind uwtable ssp { +entry: + %l_2 = alloca [1 x i32], align 4 + %arrayidx = getelementptr inbounds [1 x i32]* %l_2, i64 0, i64 0 + store i32 0, i32* %arrayidx, align 4, !tbaa !0 + %tmp = load i32* @g_3, align 4, !tbaa !0 + %idxprom = sext i32 %tmp to i64 + %arrayidx1 = getelementptr inbounds [1 x i32]* %l_2, i64 0, i64 %idxprom + %tmp1 = load i32* %arrayidx1, align 4, !tbaa !0 + %conv.i.i = and i32 %tmp1, 65535 + %tobool.i.i.i = icmp ne i32 %tmp, 0 + br label %codeRepl + +codeRepl.loopexit.us-lcssa: ; preds = %for.body.i.i, %codeRepl5 + br label %codeRepl.loopexit + +codeRepl.loopexit: ; preds = %codeRepl.loopexit.us-lcssa.us, %codeRepl.loopexit.us-lcssa + br label %codeRepl + +codeRepl: ; preds = %codeRepl.loopexit, %entry + br i1 %tobool.i.i.i, label %codeRepl.split.us, label %codeRepl.codeRepl.split_crit_edge + +codeRepl.codeRepl.split_crit_edge: ; preds = %codeRepl + br label %codeRepl.split + +codeRepl.split.us: ; preds = %codeRepl + br label %for.cond.i.i.us + +for.cond.i.i.us: ; preds = %for.inc.i.i.us, %codeRepl.split.us + %tmp2 = phi i32 [ 0, %codeRepl.split.us ], [ %add.i.i.us, %for.inc.i.i.us ] + br label %codeRepl5.us + +for.inc.i.i.us: ; preds = %for.body.i.i.us + %add.i.i.us = add nsw i32 %tmp2, 1 + store i32 %add.i.i.us, i32* @g_752, align 4, !tbaa !0 + br label %for.cond.i.i.us + +for.body.i.i.us: ; preds = %codeRepl5.us + %div.i.i.i.us = udiv i32 1, %conv.i.i + %cmp5.i.i.us = icmp eq i32 %div.i.i.i.us, %tmp2 + br i1 %cmp5.i.i.us, label %codeRepl.loopexit.us-lcssa.us, label %for.inc.i.i.us + +codeRepl5.us: ; preds = %for.cond.i.i.us + br i1 true, label %codeRepl.loopexit.us-lcssa.us, label %for.body.i.i.us + +codeRepl.loopexit.us-lcssa.us: ; preds = %codeRepl5.us, %for.body.i.i.us + br label %codeRepl.loopexit + +codeRepl.split: ; preds = %codeRepl.codeRepl.split_crit_edge + br label %for.cond.i.i + +for.cond.i.i: ; preds = %for.inc.i.i, %codeRepl.split + %tmp3 = phi i32 [ 0, %codeRepl.split ], [ %add.i.i, %for.inc.i.i ] + br label %codeRepl5 + +codeRepl5: ; preds = %for.cond.i.i + br i1 true, label %codeRepl.loopexit.us-lcssa, label %for.body.i.i + +for.body.i.i: ; preds = %codeRepl5 + %cmp5.i.i = icmp eq i32 0, %tmp3 + br i1 %cmp5.i.i, label %codeRepl.loopexit.us-lcssa, label %for.inc.i.i + +for.inc.i.i: ; preds = %for.body.i.i + %add.i.i = add nsw i32 %tmp3, 1 + store i32 %add.i.i, i32* @g_752, align 4, !tbaa !0 + br label %for.cond.i.i + +func_4.exit: ; No predecessors! + ret i32 0 +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} |