diff options
Diffstat (limited to 'test/Transforms/LoopUnroll')
-rw-r--r-- | test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll | 99 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/full-unroll-heuristics.ll | 62 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/partial-unroll-optsize.ll | 19 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/runtime-loop.ll | 8 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/runtime-loop1.ll | 2 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/runtime-loop2.ll | 2 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/tripcount-overflow.ll | 29 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll | 42 | ||||
-rw-r--r-- | test/Transforms/LoopUnroll/unroll-pragmas.ll | 26 |
9 files changed, 236 insertions, 53 deletions
diff --git a/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll b/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll new file mode 100644 index 0000000..7a50fc0 --- /dev/null +++ b/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll @@ -0,0 +1,99 @@ +; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -loop-unroll | FileCheck %s +define void @unroll_opt_for_size() nounwind optsize { +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %inc, %loop ] + %inc = add i32 %iv, 1 + %exitcnd = icmp uge i32 %inc, 1024 + br i1 %exitcnd, label %exit, label %loop + +exit: + ret void +} + +; CHECK-LABEL: @unroll_opt_for_size +; CHECK: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: icmp + +define void @unroll_default() nounwind { +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %inc, %loop ] + %inc = add i32 %iv, 1 + %exitcnd = icmp uge i32 %inc, 1024 + br i1 %exitcnd, label %exit, label %loop + +exit: + ret void +} + +; CHECK-LABEL: @unroll_default +; CHECK: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: icmp + diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics.ll new file mode 100644 index 0000000..a1bb4c5 --- /dev/null +++ b/test/Transforms/LoopUnroll/full-unroll-heuristics.ll @@ -0,0 +1,62 @@ +; In this test we check how heuristics for complete unrolling work. We have +; three knobs: +; 1) -unroll-threshold +; 2) -unroll-absolute-threshold and +; 3) -unroll-percent-of-optimized-for-complete-unroll +; +; They control loop-unrolling according to the following rules: +; * If size of unrolled loop exceeds the absoulte threshold, we don't unroll +; this loop under any circumstances. +; * If size of unrolled loop is below the '-unroll-threshold', then we'll +; consider this loop as a very small one, and completely unroll it. +; * If a loop size is between these two tresholds, we only do complete unroll +; it if estimated number of potentially optimized instructions is high (we +; specify the minimal percent of such instructions). + +; In this particular test-case, complete unrolling will allow later +; optimizations to remove ~55% of the instructions, the loop body size is 9, +; and unrolled size is 65. + +; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-absolute-threshold=10 -unroll-threshold=10 -unroll-percent-of-optimized-for-complete-unroll=30 | FileCheck %s -check-prefix=TEST1 +; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-absolute-threshold=100 -unroll-threshold=10 -unroll-percent-of-optimized-for-complete-unroll=30 | FileCheck %s -check-prefix=TEST2 +; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-absolute-threshold=100 -unroll-threshold=10 -unroll-percent-of-optimized-for-complete-unroll=80 | FileCheck %s -check-prefix=TEST3 +; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-absolute-threshold=100 -unroll-threshold=100 -unroll-percent-of-optimized-for-complete-unroll=80 | FileCheck %s -check-prefix=TEST4 + +; If the absolute threshold is too low, or if we can't optimize away requested +; percent of instructions, we shouldn't unroll: +; TEST1: %array_const_idx = getelementptr inbounds [9 x i32]* @known_constant, i64 0, i64 %iv +; TEST3: %array_const_idx = getelementptr inbounds [9 x i32]* @known_constant, i64 0, i64 %iv + +; Otherwise, we should: +; TEST2-NOT: %array_const_idx = getelementptr inbounds [9 x i32]* @known_constant, i64 0, i64 %iv + +; Also, we should unroll if the 'unroll-threshold' is big enough: +; TEST4-NOT: %array_const_idx = getelementptr inbounds [9 x i32]* @known_constant, i64 0, i64 %iv + +; And check that we don't crash when we're not allowed to do any analysis. +; RUN: opt < %s -loop-unroll -unroll-max-iteration-count-to-analyze=0 -disable-output +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +@known_constant = internal unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16 + +define i32 @foo(i32* noalias nocapture readonly %src) { +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i64 [ 0, %entry ], [ %inc, %loop ] + %r = phi i32 [ 0, %entry ], [ %add, %loop ] + %arrayidx = getelementptr inbounds i32* %src, i64 %iv + %src_element = load i32* %arrayidx, align 4 + %array_const_idx = getelementptr inbounds [9 x i32]* @known_constant, i64 0, i64 %iv + %const_array_element = load i32* %array_const_idx, align 4 + %mul = mul nsw i32 %src_element, %const_array_element + %add = add nsw i32 %mul, %r + %inc = add nuw nsw i64 %iv, 1 + %exitcond86.i = icmp eq i64 %inc, 9 + br i1 %exitcond86.i, label %loop.end, label %loop + +loop.end: ; preds = %loop + %r.lcssa = phi i32 [ %r, %loop ] + ret i32 %r.lcssa +} diff --git a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll index 3179d55..a650317 100644 --- a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll +++ b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -S -loop-unroll -unroll-allow-partial | FileCheck %s ; Loop size = 3, when the function has the optsize attribute, the ; OptSizeUnrollThreshold, i.e. 50, is used, hence the loop should be unrolled -; by 16 times because 3 * 16 < 50. +; by 32 times because (1 * 32) + 2 < 50 (whereas (1 * 64 + 2) is not). define void @unroll_opt_for_size() nounwind optsize { entry: br label %loop @@ -32,4 +32,21 @@ exit: ; CHECK-NEXT: add ; CHECK-NEXT: add ; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add ; CHECK-NEXT: icmp + diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll index 05d03f2..80571ec 100644 --- a/test/Transforms/LoopUnroll/runtime-loop.ll +++ b/test/Transforms/LoopUnroll/runtime-loop.ll @@ -4,9 +4,7 @@ ; CHECK: %xtraiter = and i32 %n ; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0 -; CHECK: %lcmp.overflow = icmp eq i32 %n, 0 -; CHECK: %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod -; CHECK: br i1 %lcmp.or, label %for.body.prol, label %for.body.preheader.split +; CHECK: br i1 %lcmp.mod, label %for.body.prol, label %for.body.preheader.split ; CHECK: for.body.prol: ; CHECK: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.preheader ] @@ -115,6 +113,6 @@ for.end: ; preds = %for.cond.for.end_cr ret i16 %res.0.lcssa } -; CHECK: !0 = metadata !{metadata !0, metadata !1} -; CHECK: !1 = metadata !{metadata !"llvm.loop.unroll.disable"} +; CHECK: !0 = distinct !{!0, !1} +; CHECK: !1 = !{!"llvm.loop.unroll.disable"} diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll index 38b4f32..5ff75e3 100644 --- a/test/Transforms/LoopUnroll/runtime-loop1.ll +++ b/test/Transforms/LoopUnroll/runtime-loop1.ll @@ -3,7 +3,7 @@ ; This tests that setting the unroll count works ; CHECK: for.body.prol: -; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split +; CHECK: br label %for.body.preheader.split ; CHECK: for.body: ; CHECK: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body ; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body diff --git a/test/Transforms/LoopUnroll/runtime-loop2.ll b/test/Transforms/LoopUnroll/runtime-loop2.ll index 7205c68..176362a 100644 --- a/test/Transforms/LoopUnroll/runtime-loop2.ll +++ b/test/Transforms/LoopUnroll/runtime-loop2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 -unroll-runtime -unroll-count=8 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-count=8 | FileCheck %s ; Choose a smaller, power-of-two, unroll count if the loop is too large. ; This test makes sure we're not unrolling 'odd' counts diff --git a/test/Transforms/LoopUnroll/tripcount-overflow.ll b/test/Transforms/LoopUnroll/tripcount-overflow.ll index d593685..052077c 100644 --- a/test/Transforms/LoopUnroll/tripcount-overflow.ll +++ b/test/Transforms/LoopUnroll/tripcount-overflow.ll @@ -1,19 +1,28 @@ ; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -; When prologue is fully unrolled, the branch on its end is unconditional. -; Unrolling it is illegal if we can't prove that trip-count+1 doesn't overflow, -; like in this example, where it comes from an argument. -; -; This test is based on an example from here: -; http://stackoverflow.com/questions/23838661/why-is-clang-optimizing-this-code-out -; +; This test case documents how runtime loop unrolling handles the case +; when the backedge-count is -1. + +; If %N, the backedge-taken count, is -1 then %0 unsigned-overflows +; and is 0. %xtraiter too is 0, signifying that the total trip-count +; is divisible by 2. The prologue then branches to the unrolled loop +; and executes the 2^32 iterations there, in groups of 2. + + +; CHECK: entry: +; CHECK-NEXT: %0 = add i32 %N, 1 +; CHECK-NEXT: %xtraiter = and i32 %0, 1 +; CHECK-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0 +; CHECK-NEXT: br i1 %lcmp.mod, label %while.body.prol, label %entry.split + ; CHECK: while.body.prol: -; CHECK: br i1 +; CHECK: br label %entry.split + ; CHECK: entry.split: ; Function Attrs: nounwind readnone ssp uwtable -define i32 @foo(i32 %N) #0 { +define i32 @foo(i32 %N) { entry: br label %while.body @@ -26,5 +35,3 @@ while.body: ; preds = %while.body, %entry while.end: ; preds = %while.body ret i32 %i } - -attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll b/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll index db18f25..4f934a6 100644 --- a/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll +++ b/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll @@ -30,10 +30,10 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } -!1 = metadata !{metadata !1, metadata !2, metadata !3, metadata !4} -!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true} -!3 = metadata !{metadata !"llvm.loop.unroll.count", i32 4} -!4 = metadata !{metadata !"llvm.loop.vectorize.width", i32 8} +!1 = !{!1, !2, !3, !4} +!2 = !{!"llvm.loop.vectorize.enable", i1 true} +!3 = !{!"llvm.loop.unroll.count", i32 4} +!4 = !{!"llvm.loop.vectorize.width", i32 8} ; #pragma clang loop unroll(full) ; @@ -63,8 +63,8 @@ for.body: ; preds = %entry, %for.body for.end: ; preds = %for.body, %entry ret void } -!5 = metadata !{metadata !5, metadata !6} -!6 = metadata !{metadata !"llvm.loop.unroll.full"} +!5 = !{!5, !6} +!6 = !{!"llvm.loop.unroll.full"} ; #pragma clang loop unroll(disable) ; @@ -89,8 +89,8 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } -!7 = metadata !{metadata !7, metadata !8} -!8 = metadata !{metadata !"llvm.loop.unroll.disable"} +!7 = !{!7, !8} +!8 = !{!"llvm.loop.unroll.disable"} ; This function contains two loops which share the same llvm.loop metadata node ; with an llvm.loop.unroll.count 2 hint. Both loops should be unrolled. This @@ -134,16 +134,16 @@ for.body3.1: ; preds = %for.body3.1.prehead for.inc5.1: ; preds = %for.body3.1 ret void } -!9 = metadata !{metadata !9, metadata !10} -!10 = metadata !{metadata !"llvm.loop.unroll.count", i32 2} - - -; CHECK: ![[LOOP_1]] = metadata !{metadata ![[LOOP_1]], metadata ![[VEC_ENABLE:.*]], metadata ![[WIDTH_8:.*]], metadata ![[UNROLL_DISABLE:.*]]} -; CHECK: ![[VEC_ENABLE]] = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true} -; CHECK: ![[WIDTH_8]] = metadata !{metadata !"llvm.loop.vectorize.width", i32 8} -; CHECK: ![[UNROLL_DISABLE]] = metadata !{metadata !"llvm.loop.unroll.disable"} -; CHECK: ![[LOOP_2]] = metadata !{metadata ![[LOOP_2]], metadata ![[UNROLL_FULL:.*]]} -; CHECK: ![[UNROLL_FULL]] = metadata !{metadata !"llvm.loop.unroll.full"} -; CHECK: ![[LOOP_3]] = metadata !{metadata ![[LOOP_3]], metadata ![[UNROLL_DISABLE:.*]]} -; CHECK: ![[LOOP_4]] = metadata !{metadata ![[LOOP_4]], metadata ![[UNROLL_DISABLE:.*]]} -; CHECK: ![[LOOP_5]] = metadata !{metadata ![[LOOP_5]], metadata ![[UNROLL_DISABLE:.*]]} +!9 = !{!9, !10} +!10 = !{!"llvm.loop.unroll.count", i32 2} + + +; CHECK: ![[LOOP_1]] = distinct !{![[LOOP_1]], ![[VEC_ENABLE:.*]], ![[WIDTH_8:.*]], ![[UNROLL_DISABLE:.*]]} +; CHECK: ![[VEC_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true} +; CHECK: ![[WIDTH_8]] = !{!"llvm.loop.vectorize.width", i32 8} +; CHECK: ![[UNROLL_DISABLE]] = !{!"llvm.loop.unroll.disable"} +; CHECK: ![[LOOP_2]] = distinct !{![[LOOP_2]], ![[UNROLL_FULL:.*]]} +; CHECK: ![[UNROLL_FULL]] = !{!"llvm.loop.unroll.full"} +; CHECK: ![[LOOP_3]] = distinct !{![[LOOP_3]], ![[UNROLL_DISABLE:.*]]} +; CHECK: ![[LOOP_4]] = distinct !{![[LOOP_4]], ![[UNROLL_DISABLE:.*]]} +; CHECK: ![[LOOP_5]] = distinct !{![[LOOP_5]], ![[UNROLL_DISABLE:.*]]} diff --git a/test/Transforms/LoopUnroll/unroll-pragmas.ll b/test/Transforms/LoopUnroll/unroll-pragmas.ll index 1ca249d..5831557 100644 --- a/test/Transforms/LoopUnroll/unroll-pragmas.ll +++ b/test/Transforms/LoopUnroll/unroll-pragmas.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -loop-unroll -S | FileCheck %s -; RUN: opt < %s -loop-unroll -loop-unroll -S | FileCheck %s +; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s +; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s ; ; Run loop unrolling twice to verify that loop unrolling metadata is properly ; removed and further unrolling is disabled after the pass is run once. @@ -54,8 +54,8 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } -!1 = metadata !{metadata !1, metadata !2} -!2 = metadata !{metadata !"llvm.loop.unroll.disable"} +!1 = !{!1, !2} +!2 = !{!"llvm.loop.unroll.disable"} ; loop64 has a high enough count that it should *not* be unrolled by ; the default unrolling heuristic. It serves as the control for the @@ -105,8 +105,8 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } -!3 = metadata !{metadata !3, metadata !4} -!4 = metadata !{metadata !"llvm.loop.unroll.full"} +!3 = !{!3, !4} +!4 = !{!"llvm.loop.unroll.full"} ; #pragma clang loop unroll_count(4) ; Loop should be unrolled 4 times. @@ -135,8 +135,8 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } -!5 = metadata !{metadata !5, metadata !6} -!6 = metadata !{metadata !"llvm.loop.unroll.count", i32 4} +!5 = !{!5, !6} +!6 = !{!"llvm.loop.unroll.count", i32 4} ; #pragma clang loop unroll(full) ; Full unrolling is requested, but loop has a dynamic trip count so @@ -165,7 +165,7 @@ for.body: ; preds = %entry, %for.body for.end: ; preds = %for.body, %entry ret void } -!8 = metadata !{metadata !8, metadata !4} +!8 = !{!8, !4} ; #pragma clang loop unroll_count(4) ; Loop has a dynamic trip count. Unrolling should occur, but no @@ -202,7 +202,7 @@ for.body: ; preds = %entry, %for.body for.end: ; preds = %for.body, %entry ret void } -!9 = metadata !{metadata !9, metadata !6} +!9 = !{!9, !6} ; #pragma clang loop unroll_count(1) ; Loop should not be unrolled @@ -228,8 +228,8 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } -!10 = metadata !{metadata !10, metadata !11} -!11 = metadata !{metadata !"llvm.loop.unroll.count", i32 1} +!10 = !{!10, !11} +!11 = !{!"llvm.loop.unroll.count", i32 1} ; #pragma clang loop unroll(full) ; Loop has very high loop count (1 million) and full unrolling was requested. @@ -256,4 +256,4 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } -!12 = metadata !{metadata !12, metadata !4} +!12 = !{!12, !4} |