aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Trick <atrick@apple.com>2013-07-12 22:08:48 +0000
committerAndrew Trick <atrick@apple.com>2013-07-12 22:08:48 +0000
commit16404cc817e8b8d3fbfbc9051394eb59d7c5ffb4 (patch)
treec38db272b7095a3c70a84d9d8e5ca205b93c08e4
parent807e6c71a8d13531ec359006e3b6526ae70064a8 (diff)
downloadexternal_llvm-16404cc817e8b8d3fbfbc9051394eb59d7c5ffb4.zip
external_llvm-16404cc817e8b8d3fbfbc9051394eb59d7c5ffb4.tar.gz
external_llvm-16404cc817e8b8d3fbfbc9051394eb59d7c5ffb4.tar.bz2
LFTR improvement to avoid truncation.
This is a reimplemntation of the patch originally in r186107. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186215 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp38
-rw-r--r--test/Transforms/IndVarSimplify/lftr-extend-const.ll44
2 files changed, 76 insertions, 6 deletions
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index f0ac637..d51e034 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1590,15 +1590,41 @@ LinearFunctionTestReplace(Loop *L,
<< " RHS:\t" << *ExitCnt << "\n"
<< " IVCount:\t" << *IVCount << "\n");
+ IRBuilder<> Builder(BI);
+
// LFTR can ignore IV overflow and truncate to the width of
// BECount. This avoids materializing the add(zext(add)) expression.
- IRBuilder<> Builder(BI);
- if (SE->getTypeSizeInBits(CmpIndVar->getType())
- > SE->getTypeSizeInBits(ExitCnt->getType())) {
- CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
- "lftr.wideiv");
+ unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
+ unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
+ if (CmpIndVarSize > ExitCntSize) {
+ const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
+ const SCEV *ARStart = AR->getStart();
+ const SCEV *ARStep = AR->getStepRecurrence(*SE);
+ // For constant IVCount, avoid truncation.
+ if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
+ const APInt &Start = cast<SCEVConstant>(ARStart)->getValue()->getValue();
+ APInt Count = cast<SCEVConstant>(IVCount)->getValue()->getValue();
+ // Note that the post-inc value of BackedgeTakenCount may have overflowed
+ // above such that IVCount is now zero.
+ if (IVCount != BackedgeTakenCount && Count == 0) {
+ Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
+ ++Count;
+ }
+ else
+ Count = Count.zext(CmpIndVarSize);
+ APInt NewLimit;
+ if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
+ NewLimit = Start - Count;
+ else
+ NewLimit = Start + Count;
+ ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
+
+ DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n");
+ } else {
+ CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
+ "lftr.wideiv");
+ }
}
-
Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
Value *OrigCond = BI->getCondition();
// It's tempting to use replaceAllUsesWith here to fully replace the old
diff --git a/test/Transforms/IndVarSimplify/lftr-extend-const.ll b/test/Transforms/IndVarSimplify/lftr-extend-const.ll
new file mode 100644
index 0000000..18e1507
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/lftr-extend-const.ll
@@ -0,0 +1,44 @@
+;RUN: opt -S %s -indvars | FileCheck %s
+
+; CHECK-LABEL: @foo
+; CHECK-NOT: %lftr.wideiv = trunc i32 %indvars.iv.next to i16
+; CHECK: %exitcond = icmp ne i32 %indvars.iv.next, 512
+define void @foo() #0 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.01 = phi i16 [ 0, %entry ], [ %inc, %for.body ]
+ %conv2 = sext i16 %i.01 to i32
+ call void @bar(i32 %conv2) #1
+ %inc = add i16 %i.01, 1
+ %cmp = icmp slt i16 %inc, 512
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+; Check that post-incrementing the backedge taken count does not overflow.
+; CHECK-LABEL: @postinc
+; CHECK: icmp eq i32 %indvars.iv.next, 256
+define i32 @postinc() #0 {
+entry:
+ br label %do.body
+
+do.body: ; preds = %do.body, %entry
+ %first.0 = phi i8 [ 0, %entry ], [ %inc, %do.body ]
+ %conv = zext i8 %first.0 to i32
+ call void @bar(i32 %conv) #1
+ %inc = add i8 %first.0, 1
+ %cmp = icmp eq i8 %first.0, -1
+ br i1 %cmp, label %do.end, label %do.body
+
+do.end: ; preds = %do.body
+ ret i32 0
+}
+
+declare void @bar(i32)
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }