aboutsummaryrefslogtreecommitdiffstats
path: root/test/Transforms/LoopUnroll
diff options
context:
space:
mode:
Diffstat (limited to 'test/Transforms/LoopUnroll')
-rw-r--r--test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll99
-rw-r--r--test/Transforms/LoopUnroll/full-unroll-heuristics.ll62
-rw-r--r--test/Transforms/LoopUnroll/partial-unroll-optsize.ll19
-rw-r--r--test/Transforms/LoopUnroll/runtime-loop.ll8
-rw-r--r--test/Transforms/LoopUnroll/runtime-loop1.ll2
-rw-r--r--test/Transforms/LoopUnroll/runtime-loop2.ll2
-rw-r--r--test/Transforms/LoopUnroll/tripcount-overflow.ll29
-rw-r--r--test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll42
-rw-r--r--test/Transforms/LoopUnroll/unroll-pragmas.ll26
9 files changed, 236 insertions, 53 deletions
diff --git a/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll b/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll
new file mode 100644
index 0000000..7a50fc0
--- /dev/null
+++ b/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll
@@ -0,0 +1,99 @@
+; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -loop-unroll | FileCheck %s
+define void @unroll_opt_for_size() nounwind optsize {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+ %inc = add i32 %iv, 1
+ %exitcnd = icmp uge i32 %inc, 1024
+ br i1 %exitcnd, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; CHECK-LABEL: @unroll_opt_for_size
+; CHECK: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp
+
+define void @unroll_default() nounwind {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+ %inc = add i32 %iv, 1
+ %exitcnd = icmp uge i32 %inc, 1024
+ br i1 %exitcnd, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; CHECK-LABEL: @unroll_default
+; CHECK: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp
+
diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics.ll
new file mode 100644
index 0000000..a1bb4c5
--- /dev/null
+++ b/test/Transforms/LoopUnroll/full-unroll-heuristics.ll
@@ -0,0 +1,62 @@
+; In this test we check how heuristics for complete unrolling work. We have
+; three knobs:
+; 1) -unroll-threshold
+; 2) -unroll-absolute-threshold and
+; 3) -unroll-percent-of-optimized-for-complete-unroll
+;
+; They control loop-unrolling according to the following rules:
+; * If size of unrolled loop exceeds the absoulte threshold, we don't unroll
+; this loop under any circumstances.
+; * If size of unrolled loop is below the '-unroll-threshold', then we'll
+; consider this loop as a very small one, and completely unroll it.
+; * If a loop size is between these two tresholds, we only do complete unroll
+; it if estimated number of potentially optimized instructions is high (we
+; specify the minimal percent of such instructions).
+
+; In this particular test-case, complete unrolling will allow later
+; optimizations to remove ~55% of the instructions, the loop body size is 9,
+; and unrolled size is 65.
+
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-absolute-threshold=10 -unroll-threshold=10 -unroll-percent-of-optimized-for-complete-unroll=30 | FileCheck %s -check-prefix=TEST1
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-absolute-threshold=100 -unroll-threshold=10 -unroll-percent-of-optimized-for-complete-unroll=30 | FileCheck %s -check-prefix=TEST2
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-absolute-threshold=100 -unroll-threshold=10 -unroll-percent-of-optimized-for-complete-unroll=80 | FileCheck %s -check-prefix=TEST3
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-absolute-threshold=100 -unroll-threshold=100 -unroll-percent-of-optimized-for-complete-unroll=80 | FileCheck %s -check-prefix=TEST4
+
+; If the absolute threshold is too low, or if we can't optimize away requested
+; percent of instructions, we shouldn't unroll:
+; TEST1: %array_const_idx = getelementptr inbounds [9 x i32]* @known_constant, i64 0, i64 %iv
+; TEST3: %array_const_idx = getelementptr inbounds [9 x i32]* @known_constant, i64 0, i64 %iv
+
+; Otherwise, we should:
+; TEST2-NOT: %array_const_idx = getelementptr inbounds [9 x i32]* @known_constant, i64 0, i64 %iv
+
+; Also, we should unroll if the 'unroll-threshold' is big enough:
+; TEST4-NOT: %array_const_idx = getelementptr inbounds [9 x i32]* @known_constant, i64 0, i64 %iv
+
+; And check that we don't crash when we're not allowed to do any analysis.
+; RUN: opt < %s -loop-unroll -unroll-max-iteration-count-to-analyze=0 -disable-output
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@known_constant = internal unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
+
+define i32 @foo(i32* noalias nocapture readonly %src) {
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+ %r = phi i32 [ 0, %entry ], [ %add, %loop ]
+ %arrayidx = getelementptr inbounds i32* %src, i64 %iv
+ %src_element = load i32* %arrayidx, align 4
+ %array_const_idx = getelementptr inbounds [9 x i32]* @known_constant, i64 0, i64 %iv
+ %const_array_element = load i32* %array_const_idx, align 4
+ %mul = mul nsw i32 %src_element, %const_array_element
+ %add = add nsw i32 %mul, %r
+ %inc = add nuw nsw i64 %iv, 1
+ %exitcond86.i = icmp eq i64 %inc, 9
+ br i1 %exitcond86.i, label %loop.end, label %loop
+
+loop.end: ; preds = %loop
+ %r.lcssa = phi i32 [ %r, %loop ]
+ ret i32 %r.lcssa
+}
diff --git a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll
index 3179d55..a650317 100644
--- a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll
+++ b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll
@@ -1,7 +1,7 @@
; RUN: opt < %s -S -loop-unroll -unroll-allow-partial | FileCheck %s
; Loop size = 3, when the function has the optsize attribute, the
; OptSizeUnrollThreshold, i.e. 50, is used, hence the loop should be unrolled
-; by 16 times because 3 * 16 < 50.
+; by 32 times because (1 * 32) + 2 < 50 (whereas (1 * 64 + 2) is not).
define void @unroll_opt_for_size() nounwind optsize {
entry:
br label %loop
@@ -32,4 +32,21 @@ exit:
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
; CHECK-NEXT: icmp
+
diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll
index 05d03f2..80571ec 100644
--- a/test/Transforms/LoopUnroll/runtime-loop.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -4,9 +4,7 @@
; CHECK: %xtraiter = and i32 %n
; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0
-; CHECK: %lcmp.overflow = icmp eq i32 %n, 0
-; CHECK: %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod
-; CHECK: br i1 %lcmp.or, label %for.body.prol, label %for.body.preheader.split
+; CHECK: br i1 %lcmp.mod, label %for.body.prol, label %for.body.preheader.split
; CHECK: for.body.prol:
; CHECK: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.preheader ]
@@ -115,6 +113,6 @@ for.end: ; preds = %for.cond.for.end_cr
ret i16 %res.0.lcssa
}
-; CHECK: !0 = metadata !{metadata !0, metadata !1}
-; CHECK: !1 = metadata !{metadata !"llvm.loop.unroll.disable"}
+; CHECK: !0 = distinct !{!0, !1}
+; CHECK: !1 = !{!"llvm.loop.unroll.disable"}
diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll
index 38b4f32..5ff75e3 100644
--- a/test/Transforms/LoopUnroll/runtime-loop1.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop1.ll
@@ -3,7 +3,7 @@
; This tests that setting the unroll count works
; CHECK: for.body.prol:
-; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split
+; CHECK: br label %for.body.preheader.split
; CHECK: for.body:
; CHECK: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body
; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
diff --git a/test/Transforms/LoopUnroll/runtime-loop2.ll b/test/Transforms/LoopUnroll/runtime-loop2.ll
index 7205c68..176362a 100644
--- a/test/Transforms/LoopUnroll/runtime-loop2.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop2.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 -unroll-runtime -unroll-count=8 | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-count=8 | FileCheck %s
; Choose a smaller, power-of-two, unroll count if the loop is too large.
; This test makes sure we're not unrolling 'odd' counts
diff --git a/test/Transforms/LoopUnroll/tripcount-overflow.ll b/test/Transforms/LoopUnroll/tripcount-overflow.ll
index d593685..052077c 100644
--- a/test/Transforms/LoopUnroll/tripcount-overflow.ll
+++ b/test/Transforms/LoopUnroll/tripcount-overflow.ll
@@ -1,19 +1,28 @@
; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-; When prologue is fully unrolled, the branch on its end is unconditional.
-; Unrolling it is illegal if we can't prove that trip-count+1 doesn't overflow,
-; like in this example, where it comes from an argument.
-;
-; This test is based on an example from here:
-; http://stackoverflow.com/questions/23838661/why-is-clang-optimizing-this-code-out
-;
+; This test case documents how runtime loop unrolling handles the case
+; when the backedge-count is -1.
+
+; If %N, the backedge-taken count, is -1 then %0 unsigned-overflows
+; and is 0. %xtraiter too is 0, signifying that the total trip-count
+; is divisible by 2. The prologue then branches to the unrolled loop
+; and executes the 2^32 iterations there, in groups of 2.
+
+
+; CHECK: entry:
+; CHECK-NEXT: %0 = add i32 %N, 1
+; CHECK-NEXT: %xtraiter = and i32 %0, 1
+; CHECK-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0
+; CHECK-NEXT: br i1 %lcmp.mod, label %while.body.prol, label %entry.split
+
; CHECK: while.body.prol:
-; CHECK: br i1
+; CHECK: br label %entry.split
+
; CHECK: entry.split:
; Function Attrs: nounwind readnone ssp uwtable
-define i32 @foo(i32 %N) #0 {
+define i32 @foo(i32 %N) {
entry:
br label %while.body
@@ -26,5 +35,3 @@ while.body: ; preds = %while.body, %entry
while.end: ; preds = %while.body
ret i32 %i
}
-
-attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll b/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll
index db18f25..4f934a6 100644
--- a/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll
+++ b/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll
@@ -30,10 +30,10 @@ for.body: ; preds = %for.body, %entry
for.end: ; preds = %for.body
ret void
}
-!1 = metadata !{metadata !1, metadata !2, metadata !3, metadata !4}
-!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
-!3 = metadata !{metadata !"llvm.loop.unroll.count", i32 4}
-!4 = metadata !{metadata !"llvm.loop.vectorize.width", i32 8}
+!1 = !{!1, !2, !3, !4}
+!2 = !{!"llvm.loop.vectorize.enable", i1 true}
+!3 = !{!"llvm.loop.unroll.count", i32 4}
+!4 = !{!"llvm.loop.vectorize.width", i32 8}
; #pragma clang loop unroll(full)
;
@@ -63,8 +63,8 @@ for.body: ; preds = %entry, %for.body
for.end: ; preds = %for.body, %entry
ret void
}
-!5 = metadata !{metadata !5, metadata !6}
-!6 = metadata !{metadata !"llvm.loop.unroll.full"}
+!5 = !{!5, !6}
+!6 = !{!"llvm.loop.unroll.full"}
; #pragma clang loop unroll(disable)
;
@@ -89,8 +89,8 @@ for.body: ; preds = %for.body, %entry
for.end: ; preds = %for.body
ret void
}
-!7 = metadata !{metadata !7, metadata !8}
-!8 = metadata !{metadata !"llvm.loop.unroll.disable"}
+!7 = !{!7, !8}
+!8 = !{!"llvm.loop.unroll.disable"}
; This function contains two loops which share the same llvm.loop metadata node
; with an llvm.loop.unroll.count 2 hint. Both loops should be unrolled. This
@@ -134,16 +134,16 @@ for.body3.1: ; preds = %for.body3.1.prehead
for.inc5.1: ; preds = %for.body3.1
ret void
}
-!9 = metadata !{metadata !9, metadata !10}
-!10 = metadata !{metadata !"llvm.loop.unroll.count", i32 2}
-
-
-; CHECK: ![[LOOP_1]] = metadata !{metadata ![[LOOP_1]], metadata ![[VEC_ENABLE:.*]], metadata ![[WIDTH_8:.*]], metadata ![[UNROLL_DISABLE:.*]]}
-; CHECK: ![[VEC_ENABLE]] = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
-; CHECK: ![[WIDTH_8]] = metadata !{metadata !"llvm.loop.vectorize.width", i32 8}
-; CHECK: ![[UNROLL_DISABLE]] = metadata !{metadata !"llvm.loop.unroll.disable"}
-; CHECK: ![[LOOP_2]] = metadata !{metadata ![[LOOP_2]], metadata ![[UNROLL_FULL:.*]]}
-; CHECK: ![[UNROLL_FULL]] = metadata !{metadata !"llvm.loop.unroll.full"}
-; CHECK: ![[LOOP_3]] = metadata !{metadata ![[LOOP_3]], metadata ![[UNROLL_DISABLE:.*]]}
-; CHECK: ![[LOOP_4]] = metadata !{metadata ![[LOOP_4]], metadata ![[UNROLL_DISABLE:.*]]}
-; CHECK: ![[LOOP_5]] = metadata !{metadata ![[LOOP_5]], metadata ![[UNROLL_DISABLE:.*]]}
+!9 = !{!9, !10}
+!10 = !{!"llvm.loop.unroll.count", i32 2}
+
+
+; CHECK: ![[LOOP_1]] = distinct !{![[LOOP_1]], ![[VEC_ENABLE:.*]], ![[WIDTH_8:.*]], ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[VEC_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true}
+; CHECK: ![[WIDTH_8]] = !{!"llvm.loop.vectorize.width", i32 8}
+; CHECK: ![[UNROLL_DISABLE]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: ![[LOOP_2]] = distinct !{![[LOOP_2]], ![[UNROLL_FULL:.*]]}
+; CHECK: ![[UNROLL_FULL]] = !{!"llvm.loop.unroll.full"}
+; CHECK: ![[LOOP_3]] = distinct !{![[LOOP_3]], ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[LOOP_4]] = distinct !{![[LOOP_4]], ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[LOOP_5]] = distinct !{![[LOOP_5]], ![[UNROLL_DISABLE:.*]]}
diff --git a/test/Transforms/LoopUnroll/unroll-pragmas.ll b/test/Transforms/LoopUnroll/unroll-pragmas.ll
index 1ca249d..5831557 100644
--- a/test/Transforms/LoopUnroll/unroll-pragmas.ll
+++ b/test/Transforms/LoopUnroll/unroll-pragmas.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -loop-unroll -S | FileCheck %s
-; RUN: opt < %s -loop-unroll -loop-unroll -S | FileCheck %s
+; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s
+; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s
;
; Run loop unrolling twice to verify that loop unrolling metadata is properly
; removed and further unrolling is disabled after the pass is run once.
@@ -54,8 +54,8 @@ for.body: ; preds = %for.body, %entry
for.end: ; preds = %for.body
ret void
}
-!1 = metadata !{metadata !1, metadata !2}
-!2 = metadata !{metadata !"llvm.loop.unroll.disable"}
+!1 = !{!1, !2}
+!2 = !{!"llvm.loop.unroll.disable"}
; loop64 has a high enough count that it should *not* be unrolled by
; the default unrolling heuristic. It serves as the control for the
@@ -105,8 +105,8 @@ for.body: ; preds = %for.body, %entry
for.end: ; preds = %for.body
ret void
}
-!3 = metadata !{metadata !3, metadata !4}
-!4 = metadata !{metadata !"llvm.loop.unroll.full"}
+!3 = !{!3, !4}
+!4 = !{!"llvm.loop.unroll.full"}
; #pragma clang loop unroll_count(4)
; Loop should be unrolled 4 times.
@@ -135,8 +135,8 @@ for.body: ; preds = %for.body, %entry
for.end: ; preds = %for.body
ret void
}
-!5 = metadata !{metadata !5, metadata !6}
-!6 = metadata !{metadata !"llvm.loop.unroll.count", i32 4}
+!5 = !{!5, !6}
+!6 = !{!"llvm.loop.unroll.count", i32 4}
; #pragma clang loop unroll(full)
; Full unrolling is requested, but loop has a dynamic trip count so
@@ -165,7 +165,7 @@ for.body: ; preds = %entry, %for.body
for.end: ; preds = %for.body, %entry
ret void
}
-!8 = metadata !{metadata !8, metadata !4}
+!8 = !{!8, !4}
; #pragma clang loop unroll_count(4)
; Loop has a dynamic trip count. Unrolling should occur, but no
@@ -202,7 +202,7 @@ for.body: ; preds = %entry, %for.body
for.end: ; preds = %for.body, %entry
ret void
}
-!9 = metadata !{metadata !9, metadata !6}
+!9 = !{!9, !6}
; #pragma clang loop unroll_count(1)
; Loop should not be unrolled
@@ -228,8 +228,8 @@ for.body: ; preds = %for.body, %entry
for.end: ; preds = %for.body
ret void
}
-!10 = metadata !{metadata !10, metadata !11}
-!11 = metadata !{metadata !"llvm.loop.unroll.count", i32 1}
+!10 = !{!10, !11}
+!11 = !{!"llvm.loop.unroll.count", i32 1}
; #pragma clang loop unroll(full)
; Loop has very high loop count (1 million) and full unrolling was requested.
@@ -256,4 +256,4 @@ for.body: ; preds = %for.body, %entry
for.end: ; preds = %for.body
ret void
}
-!12 = metadata !{metadata !12, metadata !4}
+!12 = !{!12, !4}