diff options
author | Andrew Trick <atrick@apple.com> | 2012-07-13 23:33:10 +0000 |
---|---|---|
committer | Andrew Trick <atrick@apple.com> | 2012-07-13 23:33:10 +0000 |
commit | e08c32249fca32cd7b122024a4ca252fcb235694 (patch) | |
tree | 923f5912e0f49151d6ce7ff084b99bc6cec72c3e | |
parent | 31f61e8b221eee4f839687e9ec4df89a469a2652 (diff) | |
download | external_llvm-e08c32249fca32cd7b122024a4ca252fcb235694.zip external_llvm-e08c32249fca32cd7b122024a4ca252fcb235694.tar.gz external_llvm-e08c32249fca32cd7b122024a4ca252fcb235694.tar.bz2 |
LSR Fix: check SCEV expression safety before expansion.
All SCEV expressions used by LSR formulae must be safe to
expand. i.e. they may not contain UDiv unless we can prove nonzero
denominator.
Fixes PR11356: LSR hoists UDiv.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160205 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/Analysis/ScalarEvolutionExpander.h | 4 | ||||
-rw-r--r-- | lib/Analysis/ScalarEvolutionExpander.cpp | 41 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 2 | ||||
-rw-r--r-- | test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll | 90 |
4 files changed, 136 insertions, 1 deletions
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index cbbe429..3f8f149 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -24,6 +24,10 @@ namespace llvm { class TargetLowering; + /// Return true if the given expression is safe to expand in the sense that + /// all materialized values are safe to speculate. + bool isSafeToExpand(const SCEV *S); + /// SCEVExpander - This class uses information about analyze scalars to /// rewrite expressions in canonical form. /// diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index e7fe672a..b77f8d6 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1700,3 +1700,44 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, } return NumElim; } + +namespace { +// Search for a SCEV subexpression that is not safe to expand. Any expression +// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely +// UDiv expressions. We don't know if the UDiv is derived from an IR divide +// instruction, but the important thing is that we prove the denominator is +// nonzero before expansion. +// +// IVUsers already checks that IV-derived expressions are safe. So this check is +// only needed when the expression includes some subexpression that is not IV +// derived. +// +// Currently, we only allow division by a nonzero constant here. If this is +// inadequate, we could easily allow division by SCEVUnknown by using +// ValueTracking to check isKnownNonZero(). +struct SCEVFindUnsafe { + bool IsUnsafe; + + SCEVFindUnsafe(): IsUnsafe(false) {} + + bool follow(const SCEV *S) { + const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S); + if (!D) + return true; + const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS()); + if (SC && !SC->getValue()->isZero()) + return true; + IsUnsafe = true; + return false; + } + bool isDone() const { return IsUnsafe; } +}; +} + +namespace llvm { +bool isSafeToExpand(const SCEV *S) { + SCEVFindUnsafe Search; + visitAll(S, Search); + return !Search.IsUnsafe; +} +} diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 4ba969e..c0cb13d 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -2836,7 +2836,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { // x == y --> x - y == 0 const SCEV *N = SE.getSCEV(NV); - if (SE.isLoopInvariant(N, L)) { + if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) { // S is normalized, so normalize N before folding it into S // to keep the result normalized. N = TransformForPostIncUse(Normalize, N, CI, 0, diff --git a/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll b/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll new file mode 100644 index 0000000..a122208 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll @@ -0,0 +1,90 @@ +; RUN: opt -loop-reduce -S < %s | FileCheck %s +; +; PR11356: likely wrong code bug +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-darwin" + +@g_66 = global [1 x i32] zeroinitializer, align 4 +@g_775 = global i32 0, align 4 +@g_752 = global i32 0, align 4 +@g_3 = global i32 0, align 4 + +; Ensure that %div.i.i.us is not hoisted. +; CHECK: @main +; CHECK: for.body.i.i.us: +; CHECK: %div.i.i.i.us +; CHECK: %cmp5.i.i.us +define i32 @main() nounwind uwtable ssp { +entry: + %l_2 = alloca [1 x i32], align 4 + %arrayidx = getelementptr inbounds [1 x i32]* %l_2, i64 0, i64 0 + store i32 0, i32* %arrayidx, align 4, !tbaa !0 + %tmp = load i32* @g_3, align 4, !tbaa !0 + %idxprom = sext i32 %tmp to i64 + %arrayidx1 = getelementptr inbounds [1 x i32]* %l_2, i64 0, i64 %idxprom + %tmp1 = load i32* %arrayidx1, align 4, !tbaa !0 + %conv.i.i = and i32 %tmp1, 65535 + %tobool.i.i.i = icmp ne i32 %tmp, 0 + br label %codeRepl + +codeRepl.loopexit.us-lcssa: ; preds = %for.body.i.i, %codeRepl5 + br label %codeRepl.loopexit + +codeRepl.loopexit: ; preds = %codeRepl.loopexit.us-lcssa.us, %codeRepl.loopexit.us-lcssa + br label %codeRepl + +codeRepl: ; preds = %codeRepl.loopexit, %entry + br i1 %tobool.i.i.i, label %codeRepl.split.us, label %codeRepl.codeRepl.split_crit_edge + +codeRepl.codeRepl.split_crit_edge: ; preds = %codeRepl + br label %codeRepl.split + +codeRepl.split.us: ; preds = %codeRepl + br label %for.cond.i.i.us + +for.cond.i.i.us: ; preds = %for.inc.i.i.us, %codeRepl.split.us + %tmp2 = phi i32 [ 0, %codeRepl.split.us ], [ %add.i.i.us, %for.inc.i.i.us ] + br label %codeRepl5.us + +for.inc.i.i.us: ; preds = %for.body.i.i.us + %add.i.i.us = add nsw i32 %tmp2, 1 + store i32 %add.i.i.us, i32* @g_752, align 4, !tbaa !0 + br label %for.cond.i.i.us + +for.body.i.i.us: ; preds = %codeRepl5.us + %div.i.i.i.us = udiv i32 1, %conv.i.i + %cmp5.i.i.us = icmp eq i32 %div.i.i.i.us, %tmp2 + br i1 %cmp5.i.i.us, label %codeRepl.loopexit.us-lcssa.us, label %for.inc.i.i.us + +codeRepl5.us: ; preds = %for.cond.i.i.us + br i1 true, label %codeRepl.loopexit.us-lcssa.us, label %for.body.i.i.us + +codeRepl.loopexit.us-lcssa.us: ; preds = %codeRepl5.us, %for.body.i.i.us + br label %codeRepl.loopexit + +codeRepl.split: ; preds = %codeRepl.codeRepl.split_crit_edge + br label %for.cond.i.i + +for.cond.i.i: ; preds = %for.inc.i.i, %codeRepl.split + %tmp3 = phi i32 [ 0, %codeRepl.split ], [ %add.i.i, %for.inc.i.i ] + br label %codeRepl5 + +codeRepl5: ; preds = %for.cond.i.i + br i1 true, label %codeRepl.loopexit.us-lcssa, label %for.body.i.i + +for.body.i.i: ; preds = %codeRepl5 + %cmp5.i.i = icmp eq i32 0, %tmp3 + br i1 %cmp5.i.i, label %codeRepl.loopexit.us-lcssa, label %for.inc.i.i + +for.inc.i.i: ; preds = %for.body.i.i + %add.i.i = add nsw i32 %tmp3, 1 + store i32 %add.i.i, i32* @g_752, align 4, !tbaa !0 + br label %for.cond.i.i + +func_4.exit: ; No predecessors! + ret i32 0 +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} |