aboutsummaryrefslogtreecommitdiffstats
path: root/test/Transforms/LoopUnroll
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-12-01 14:51:49 -0800
committerStephen Hines <srhines@google.com>2014-12-02 16:08:10 -0800
commit37ed9c199ca639565f6ce88105f9e39e898d82d0 (patch)
tree8fb36d3910e3ee4c4e1b7422f4f017108efc52f5 /test/Transforms/LoopUnroll
parentd2327b22152ced7bc46dc629fc908959e8a52d03 (diff)
downloadexternal_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.zip
external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.tar.gz
external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.tar.bz2
Update aosp/master LLVM for rebase to r222494.
Change-Id: Ic787f5e0124df789bd26f3f24680f45e678eef2d
Diffstat (limited to 'test/Transforms/LoopUnroll')
-rw-r--r--test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll3
-rw-r--r--test/Transforms/LoopUnroll/ephemeral.ll44
-rw-r--r--test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll133
-rw-r--r--test/Transforms/LoopUnroll/nsw-tripcount.ll32
-rw-r--r--test/Transforms/LoopUnroll/runtime-loop.ll27
-rw-r--r--test/Transforms/LoopUnroll/runtime-loop1.ll8
-rw-r--r--test/Transforms/LoopUnroll/runtime-loop2.ll3
-rw-r--r--test/Transforms/LoopUnroll/scevunroll.ll15
-rw-r--r--test/Transforms/LoopUnroll/tripcount-overflow.ll30
-rw-r--r--test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll149
-rw-r--r--test/Transforms/LoopUnroll/unroll-pragmas.ll46
-rw-r--r--test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll35
12 files changed, 461 insertions, 64 deletions
diff --git a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
index 17c91e5..aae79cb 100644
--- a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
+++ b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
@@ -41,8 +41,7 @@ for.end: ; preds = %for.body, %entry
}
; CHECK-LABEL: @test
-; CHECK: unr.cmp{{.*}}:
-; CHECK: for.body.unr{{.*}}:
+; CHECK: for.body.prol{{.*}}:
; CHECK: for.body:
; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
diff --git a/test/Transforms/LoopUnroll/ephemeral.ll b/test/Transforms/LoopUnroll/ephemeral.ll
new file mode 100644
index 0000000..9d40613
--- /dev/null
+++ b/test/Transforms/LoopUnroll/ephemeral.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 | FileCheck %s
+
+; Make sure this loop is completely unrolled...
+; CHECK-LABEL: @test1
+; CHECK: for.body:
+; CHECK-NOT: for.end:
+
+define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+
+ ; This loop will be completely unrolled, even with these extra instructions,
+ ; but only because they're ephemeral (and, thus, free).
+ %1 = add nsw i32 %0, 2
+ %2 = add nsw i32 %1, 4
+ %3 = add nsw i32 %2, 4
+ %4 = add nsw i32 %3, 4
+ %5 = add nsw i32 %4, 4
+ %6 = add nsw i32 %5, 4
+ %7 = add nsw i32 %6, 4
+ %8 = add nsw i32 %7, 4
+ %9 = add nsw i32 %8, 4
+ %10 = add nsw i32 %9, 4
+ %ca = icmp sgt i32 %10, -7
+ call void @llvm.assume(i1 %ca)
+
+ %add = add nsw i32 %0, %sum.01
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 5
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret i32 %add
+}
+
+declare void @llvm.assume(i1) nounwind
+
diff --git a/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll b/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll
new file mode 100644
index 0000000..dcb5d1c
--- /dev/null
+++ b/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll
@@ -0,0 +1,133 @@
+; REQUIRES: asserts
+; RUN: opt < %s -disable-output -stats -loop-unroll -info-output-file - | FileCheck %s --check-prefix=STATS
+; STATS: 1 loop-unroll - Number of loops unrolled (completely or otherwise)
+; Test that llvm.annotation intrinsic do not count against the loop body size
+; and prevent unrolling.
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+@B = common global i32 0, align 4
+
+define void @foo(i32* noalias %A, i32 %B, i32 %C) {
+entry:
+ br label %for.body
+
+; A loop that has a small loop body (except for the annotations) that should be
+; unrolled with the default heuristic. Make sure the extra annotations do not
+; prevent unrolling
+for.body: ; preds = %entry, %for.body
+ %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ ; The real loop.
+ %mul = mul nsw i32 %B, %C
+ %arrayidx = getelementptr inbounds i32* %A, i32 %i.01
+ store i32 %mul, i32* %arrayidx, align 4
+ %inc = add nsw i32 %i.01, 1
+ %exitcond = icmp ne i32 %inc, 4
+
+ ; A bunch of annotations
+ %annot.0 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.1 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.2 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.3 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.4 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.5 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.6 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.7 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.8 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.9 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.10 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.11 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.12 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.13 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.14 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.15 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.16 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.17 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.18 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.19 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.20 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.21 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.22 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.23 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.24 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.25 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.26 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.27 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.28 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.29 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.30 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.31 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.32 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.33 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.34 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.35 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.36 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.37 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.38 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.39 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.40 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.41 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.42 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.43 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.44 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.45 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.46 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.47 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.48 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.49 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.50 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.51 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.52 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.53 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.54 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.55 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.56 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.57 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.58 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.59 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.60 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.61 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.62 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.63 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.64 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.65 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.66 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.67 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.68 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.69 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.70 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.71 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.72 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.73 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.74 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.75 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.76 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.77 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.78 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.79 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.80 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.81 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.82 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.83 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.84 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.85 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.86 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.87 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.88 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.89 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.90 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.91 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.92 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.93 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.94 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.95 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.96 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.97 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.98 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ %annot.99 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32)
diff --git a/test/Transforms/LoopUnroll/nsw-tripcount.ll b/test/Transforms/LoopUnroll/nsw-tripcount.ll
new file mode 100644
index 0000000..98cab32
--- /dev/null
+++ b/test/Transforms/LoopUnroll/nsw-tripcount.ll
@@ -0,0 +1,32 @@
+; RUN: opt -loop-unroll -S %s | FileCheck %s
+
+; extern void f(int);
+; void test1(int v) {
+; for (int i=v; i<=v+1; ++i)
+; f(i);
+; }
+;
+; We can use the nsw information to see that the tripcount will be 2, so the
+; loop should be unrolled as this is always beneficial
+
+declare void @f(i32)
+
+; CHECK-LABEL: @test1
+define void @test1(i32 %v) {
+entry:
+ %add = add nsw i32 %v, 1
+ br label %for.body
+
+for.body:
+ %i.04 = phi i32 [ %v, %entry ], [ %inc, %for.body ]
+ tail call void @f(i32 %i.04)
+ %inc = add nsw i32 %i.04, 1
+ %cmp = icmp slt i32 %i.04, %add
+ br i1 %cmp, label %for.body, label %for.end
+
+; CHECK: call void @f
+; CHECK-NOT: br i1
+; CHECK: call void @f
+for.end:
+ ret void
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll
index a14087d..05d03f2 100644
--- a/test/Transforms/LoopUnroll/runtime-loop.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -3,15 +3,16 @@
; Tests for unrolling loops with run-time trip counts
; CHECK: %xtraiter = and i32 %n
-; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0
-; CHECK: %lcmp.overflow = icmp eq i32 %n, 0
-; CHECK: %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod
-; CHECK: br i1 %lcmp.or, label %unr.cmp
+; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0
+; CHECK: %lcmp.overflow = icmp eq i32 %n, 0
+; CHECK: %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod
+; CHECK: br i1 %lcmp.or, label %for.body.prol, label %for.body.preheader.split
-; CHECK: unr.cmp{{.*}}:
-; CHECK: for.body.unr{{.*}}:
-; CHECK: for.body:
-; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
+; CHECK: for.body.prol:
+; CHECK: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.preheader ]
+; CHECK: %prol.iter.sub = sub i32 %prol.iter, 1
+; CHECK: %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0
+; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split, !llvm.loop !0
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
entry:
@@ -39,7 +40,7 @@ for.end: ; preds = %for.body, %entry
; even if the -unroll-runtime is specified
; CHECK: for.body:
-; CHECK-NOT: for.body.unr:
+; CHECK-NOT: for.body.prol:
define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
entry:
@@ -85,8 +86,8 @@ cond_true138:
; Test run-time unrolling for a loop that counts down by -2.
-; CHECK: for.body.unr:
-; CHECK: br i1 %cmp.7, label %for.cond.for.end_crit_edge{{.*}}, label %for.body
+; CHECK: for.body.prol:
+; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split
define zeroext i16 @down(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
entry:
@@ -113,3 +114,7 @@ for.end: ; preds = %for.cond.for.end_cr
%res.0.lcssa = phi i16 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
ret i16 %res.0.lcssa
}
+
+; CHECK: !0 = metadata !{metadata !0, metadata !1}
+; CHECK: !1 = metadata !{metadata !"llvm.loop.unroll.disable"}
+
diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll
index ad99b8c..38b4f32 100644
--- a/test/Transforms/LoopUnroll/runtime-loop1.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop1.ll
@@ -1,11 +1,11 @@
-; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=4 | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 | FileCheck %s
; This tests that setting the unroll count works
-; CHECK: unr.cmp:
-; CHECK: for.body.unr:
+; CHECK: for.body.prol:
+; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split
; CHECK: for.body:
-; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
+; CHECK: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body
; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
diff --git a/test/Transforms/LoopUnroll/runtime-loop2.ll b/test/Transforms/LoopUnroll/runtime-loop2.ll
index cbc7af5..7205c68 100644
--- a/test/Transforms/LoopUnroll/runtime-loop2.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop2.ll
@@ -3,8 +3,7 @@
; Choose a smaller, power-of-two, unroll count if the loop is too large.
; This test makes sure we're not unrolling 'odd' counts
-; CHECK: unr.cmp:
-; CHECK: for.body.unr:
+; CHECK: for.body.prol:
; CHECK: for.body:
; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
diff --git a/test/Transforms/LoopUnroll/scevunroll.ll b/test/Transforms/LoopUnroll/scevunroll.ll
index c3086e8..20161d7 100644
--- a/test/Transforms/LoopUnroll/scevunroll.ll
+++ b/test/Transforms/LoopUnroll/scevunroll.ll
@@ -66,16 +66,13 @@ exit2:
; SCEV properly unrolls multi-exit loops.
;
-; SCEV cannot currently unroll this loop.
-; It should ideally detect a trip count of 5.
-; rdar:14038809 [SCEV]: Optimize trip count computation for multi-exit loops.
; CHECK-LABEL: @multiExit(
-; CHECKFIXME: getelementptr i32* %base, i32 10
-; CHECKFIXME-NEXT: load i32*
-; CHECKFIXME: br i1 false, label %l2.10, label %exit1
-; CHECKFIXME: l2.10:
-; CHECKFIXME-NOT: br
-; CHECKFIXME: ret i32
+; CHECK: getelementptr i32* %base, i32 10
+; CHECK-NEXT: load i32*
+; CHECK: br i1 false, label %l2.10, label %exit1
+; CHECK: l2.10:
+; CHECK-NOT: br
+; CHECK: ret i32
define i32 @multiExit(i32* %base) nounwind {
entry:
br label %l1
diff --git a/test/Transforms/LoopUnroll/tripcount-overflow.ll b/test/Transforms/LoopUnroll/tripcount-overflow.ll
new file mode 100644
index 0000000..d593685
--- /dev/null
+++ b/test/Transforms/LoopUnroll/tripcount-overflow.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; When prologue is fully unrolled, the branch on its end is unconditional.
+; Unrolling it is illegal if we can't prove that trip-count+1 doesn't overflow,
+; like in this example, where it comes from an argument.
+;
+; This test is based on an example from here:
+; http://stackoverflow.com/questions/23838661/why-is-clang-optimizing-this-code-out
+;
+; CHECK: while.body.prol:
+; CHECK: br i1
+; CHECK: entry.split:
+
+; Function Attrs: nounwind readnone ssp uwtable
+define i32 @foo(i32 %N) #0 {
+entry:
+ br label %while.body
+
+while.body: ; preds = %while.body, %entry
+ %i = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+ %cmp = icmp eq i32 %i, %N
+ %inc = add i32 %i, 1
+ br i1 %cmp, label %while.end, label %while.body
+
+while.end: ; preds = %while.body
+ ret i32 %i
+}
+
+attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll b/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll
new file mode 100644
index 0000000..db18f25
--- /dev/null
+++ b/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll
@@ -0,0 +1,149 @@
+; RUN: opt < %s -loop-unroll -S | FileCheck %s
+;
+; Verify that the unrolling pass removes existing unroll count metadata
+; and adds a disable unrolling node after unrolling is complete.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; #pragma clang loop vectorize(enable) unroll_count(4) vectorize_width(8)
+;
+; Unroll count metadata should be replaced with unroll(disable). Vectorize
+; metadata should be untouched.
+;
+; CHECK-LABEL: @unroll_count_4(
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_1:.*]]
+define void @unroll_count_4(i32* nocapture %a) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
+
+for.end: ; preds = %for.body
+ ret void
+}
+!1 = metadata !{metadata !1, metadata !2, metadata !3, metadata !4}
+!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
+!3 = metadata !{metadata !"llvm.loop.unroll.count", i32 4}
+!4 = metadata !{metadata !"llvm.loop.vectorize.width", i32 8}
+
+; #pragma clang loop unroll(full)
+;
+; An unroll disable metadata node is only added for the unroll count case.
+; In this case, the loop has a full unroll metadata but can't be fully unrolled
+; because the trip count is dynamic. The full unroll metadata should remain
+; after unrolling.
+;
+; CHECK-LABEL: @unroll_full(
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_2:.*]]
+define void @unroll_full(i32* nocapture %a, i32 %b) {
+entry:
+ %cmp3 = icmp sgt i32 %b, 0
+ br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !5
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %b
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+!5 = metadata !{metadata !5, metadata !6}
+!6 = metadata !{metadata !"llvm.loop.unroll.full"}
+
+; #pragma clang loop unroll(disable)
+;
+; Unroll metadata should not change.
+;
+; CHECK-LABEL: @unroll_disable(
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_3:.*]]
+define void @unroll_disable(i32* nocapture %a) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7
+
+for.end: ; preds = %for.body
+ ret void
+}
+!7 = metadata !{metadata !7, metadata !8}
+!8 = metadata !{metadata !"llvm.loop.unroll.disable"}
+
+; This function contains two loops which share the same llvm.loop metadata node
+; with an llvm.loop.unroll.count 2 hint. Both loops should be unrolled. This
+; verifies that adding disable metadata to a loop after unrolling doesn't affect
+; other loops which previously shared the same llvm.loop metadata.
+;
+; CHECK-LABEL: @shared_metadata(
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_4:.*]]
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_5:.*]]
+define void @shared_metadata(i32* nocapture %List) #0 {
+entry:
+ br label %for.body3
+
+for.body3: ; preds = %for.body3, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body3 ]
+ %arrayidx = getelementptr inbounds i32* %List, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %add4 = add nsw i32 %0, 10
+ store i32 %add4, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 4
+ br i1 %exitcond, label %for.body3.1.preheader, label %for.body3, !llvm.loop !9
+
+for.body3.1.preheader: ; preds = %for.body3
+ br label %for.body3.1
+
+for.body3.1: ; preds = %for.body3.1.preheader, %for.body3.1
+ %indvars.iv.1 = phi i64 [ %1, %for.body3.1 ], [ 0, %for.body3.1.preheader ]
+ %1 = add nsw i64 %indvars.iv.1, 1
+ %arrayidx.1 = getelementptr inbounds i32* %List, i64 %1
+ %2 = load i32* %arrayidx.1, align 4
+ %add4.1 = add nsw i32 %2, 10
+ store i32 %add4.1, i32* %arrayidx.1, align 4
+ %exitcond.1 = icmp eq i64 %1, 4
+ br i1 %exitcond.1, label %for.inc5.1, label %for.body3.1, !llvm.loop !9
+
+for.inc5.1: ; preds = %for.body3.1
+ ret void
+}
+!9 = metadata !{metadata !9, metadata !10}
+!10 = metadata !{metadata !"llvm.loop.unroll.count", i32 2}
+
+
+; CHECK: ![[LOOP_1]] = metadata !{metadata ![[LOOP_1]], metadata ![[VEC_ENABLE:.*]], metadata ![[WIDTH_8:.*]], metadata ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[VEC_ENABLE]] = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
+; CHECK: ![[WIDTH_8]] = metadata !{metadata !"llvm.loop.vectorize.width", i32 8}
+; CHECK: ![[UNROLL_DISABLE]] = metadata !{metadata !"llvm.loop.unroll.disable"}
+; CHECK: ![[LOOP_2]] = metadata !{metadata ![[LOOP_2]], metadata ![[UNROLL_FULL:.*]]}
+; CHECK: ![[UNROLL_FULL]] = metadata !{metadata !"llvm.loop.unroll.full"}
+; CHECK: ![[LOOP_3]] = metadata !{metadata ![[LOOP_3]], metadata ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[LOOP_4]] = metadata !{metadata ![[LOOP_4]], metadata ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[LOOP_5]] = metadata !{metadata ![[LOOP_5]], metadata ![[UNROLL_DISABLE:.*]]}
diff --git a/test/Transforms/LoopUnroll/unroll-pragmas.ll b/test/Transforms/LoopUnroll/unroll-pragmas.ll
index 5e45a2d..1ca249d 100644
--- a/test/Transforms/LoopUnroll/unroll-pragmas.ll
+++ b/test/Transforms/LoopUnroll/unroll-pragmas.ll
@@ -1,4 +1,8 @@
; RUN: opt < %s -loop-unroll -S | FileCheck %s
+; RUN: opt < %s -loop-unroll -loop-unroll -S | FileCheck %s
+;
+; Run loop unrolling twice to verify that loop unrolling metadata is properly
+; removed and further unrolling is disabled after the pass is run once.
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@@ -51,11 +55,11 @@ for.end: ; preds = %for.body
ret void
}
!1 = metadata !{metadata !1, metadata !2}
-!2 = metadata !{metadata !"llvm.loop.unroll.enable", i1 false}
+!2 = metadata !{metadata !"llvm.loop.unroll.disable"}
; loop64 has a high enough count that it should *not* be unrolled by
; the default unrolling heuristic. It serves as the control for the
-; unroll(enable) pragma test loop64_with_.* tests below.
+; unroll(full) pragma test loop64_with_.* tests below.
;
; CHECK-LABEL: @loop64(
; CHECK: store i32
@@ -79,7 +83,7 @@ for.end: ; preds = %for.body
ret void
}
-; #pragma clang loop unroll(enable)
+; #pragma clang loop unroll(full)
; Loop should be fully unrolled.
;
; CHECK-LABEL: @loop64_with_enable(
@@ -102,7 +106,7 @@ for.end: ; preds = %for.body
ret void
}
!3 = metadata !{metadata !3, metadata !4}
-!4 = metadata !{metadata !"llvm.loop.unroll.enable", i1 true}
+!4 = metadata !{metadata !"llvm.loop.unroll.full"}
; #pragma clang loop unroll_count(4)
; Loop should be unrolled 4 times.
@@ -134,37 +138,7 @@ for.end: ; preds = %for.body
!5 = metadata !{metadata !5, metadata !6}
!6 = metadata !{metadata !"llvm.loop.unroll.count", i32 4}
-
-; #pragma clang loop unroll_count(enable) unroll_count(4)
-; Loop should be unrolled 4 times.
-;
-; CHECK-LABEL: @loop64_with_enable_and_count4(
-; CHECK: store i32
-; CHECK: store i32
-; CHECK: store i32
-; CHECK: store i32
-; CHECK-NOT: store i32
-; CHECK: br i1
-define void @loop64_with_enable_and_count4(i32* nocapture %a) {
-entry:
- br label %for.body
-
-for.body: ; preds = %for.body, %entry
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
- %0 = load i32* %arrayidx, align 4
- %inc = add nsw i32 %0, 1
- store i32 %inc, i32* %arrayidx, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp eq i64 %indvars.iv.next, 64
- br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7
-
-for.end: ; preds = %for.body
- ret void
-}
-!7 = metadata !{metadata !7, metadata !6, metadata !4}
-
-; #pragma clang loop unroll_count(enable)
+; #pragma clang loop unroll(full)
; Full unrolling is requested, but loop has a dynamic trip count so
; no unrolling should occur.
;
@@ -257,7 +231,7 @@ for.end: ; preds = %for.body
!10 = metadata !{metadata !10, metadata !11}
!11 = metadata !{metadata !"llvm.loop.unroll.count", i32 1}
-; #pragma clang loop unroll(enable)
+; #pragma clang loop unroll(full)
; Loop has very high loop count (1 million) and full unrolling was requested.
; Loop should unrolled up to the pragma threshold, but not completely.
;
diff --git a/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll b/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll
new file mode 100644
index 0000000..adbf47d
--- /dev/null
+++ b/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S < %s -loop-unroll -block-freq | FileCheck %s
+; Crasher from PR20987.
+
+; CHECK: define void @update_loop_info_in_subloops
+; CHECK: entry:
+; CHECK: L:
+; CHECK: L.inner:
+; CHECK: L.inner.latch:
+; CHECK: L.latch:
+; CHECK: L.inner.1:
+; CHECK: L.inner.latch.1:
+; CHECK: L.latch.1:
+
+define void @update_loop_info_in_subloops() {
+entry:
+ br label %L
+
+L:
+ %0 = phi i64 [ 1, %entry ], [ %1, %L.latch ]
+ br label %L.inner
+
+L.inner:
+ br label %L.inner.latch
+
+L.inner.latch:
+ br i1 false, label %L.latch, label %L.inner
+
+L.latch:
+ %1 = add i64 %0, 1
+ %2 = icmp eq i64 %1, 3
+ br i1 %2, label %exit, label %L
+
+exit:
+ ret void
+}