From 37ed9c199ca639565f6ce88105f9e39e898d82d0 Mon Sep 17 00:00:00 2001 From: Stephen Hines Date: Mon, 1 Dec 2014 14:51:49 -0800 Subject: Update aosp/master LLVM for rebase to r222494. Change-Id: Ic787f5e0124df789bd26f3f24680f45e678eef2d --- test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll | 3 +- test/Transforms/LoopUnroll/ephemeral.ll | 44 ++++++ .../LoopUnroll/ignore-annotation-intrinsic-cost.ll | 133 ++++++++++++++++++ test/Transforms/LoopUnroll/nsw-tripcount.ll | 32 +++++ test/Transforms/LoopUnroll/runtime-loop.ll | 27 ++-- test/Transforms/LoopUnroll/runtime-loop1.ll | 8 +- test/Transforms/LoopUnroll/runtime-loop2.ll | 3 +- test/Transforms/LoopUnroll/scevunroll.ll | 15 +-- test/Transforms/LoopUnroll/tripcount-overflow.ll | 30 +++++ .../LoopUnroll/unroll-pragmas-disabled.ll | 149 +++++++++++++++++++++ test/Transforms/LoopUnroll/unroll-pragmas.ll | 46 ++----- .../LoopUnroll/update-loop-info-in-subloops.ll | 35 +++++ 12 files changed, 461 insertions(+), 64 deletions(-) create mode 100644 test/Transforms/LoopUnroll/ephemeral.ll create mode 100644 test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll create mode 100644 test/Transforms/LoopUnroll/nsw-tripcount.ll create mode 100644 test/Transforms/LoopUnroll/tripcount-overflow.ll create mode 100644 test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll create mode 100644 test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll (limited to 'test/Transforms/LoopUnroll') diff --git a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll index 17c91e5..aae79cb 100644 --- a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll +++ b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll @@ -41,8 +41,7 @@ for.end: ; preds = %for.body, %entry } ; CHECK-LABEL: @test -; CHECK: unr.cmp{{.*}}: -; CHECK: for.body.unr{{.*}}: +; CHECK: for.body.prol{{.*}}: ; CHECK: for.body: ; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body diff --git a/test/Transforms/LoopUnroll/ephemeral.ll b/test/Transforms/LoopUnroll/ephemeral.ll new file mode 100644 index 0000000..9d40613 --- /dev/null +++ b/test/Transforms/LoopUnroll/ephemeral.ll @@ -0,0 +1,44 @@ +; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 | FileCheck %s + +; Make sure this loop is completely unrolled... +; CHECK-LABEL: @test1 +; CHECK: for.body: +; CHECK-NOT: for.end: + +define i32 @test1(i32* nocapture %a) nounwind uwtable readonly { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + + ; This loop will be completely unrolled, even with these extra instructions, + ; but only because they're ephemeral (and, thus, free). + %1 = add nsw i32 %0, 2 + %2 = add nsw i32 %1, 4 + %3 = add nsw i32 %2, 4 + %4 = add nsw i32 %3, 4 + %5 = add nsw i32 %4, 4 + %6 = add nsw i32 %5, 4 + %7 = add nsw i32 %6, 4 + %8 = add nsw i32 %7, 4 + %9 = add nsw i32 %8, 4 + %10 = add nsw i32 %9, 4 + %ca = icmp sgt i32 %10, -7 + call void @llvm.assume(i1 %ca) + + %add = add nsw i32 %0, %sum.01 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 5 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret i32 %add +} + +declare void @llvm.assume(i1) nounwind + diff --git a/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll b/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll new file mode 100644 index 0000000..dcb5d1c --- /dev/null +++ b/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll @@ -0,0 +1,133 @@ +; REQUIRES: asserts +; RUN: opt < %s -disable-output -stats -loop-unroll -info-output-file - | FileCheck %s --check-prefix=STATS +; STATS: 1 loop-unroll - Number of loops unrolled (completely or otherwise) +; Test that llvm.annotation intrinsic do not count against the loop body size +; and prevent unrolling. +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +@B = common global i32 0, align 4 + +define void @foo(i32* noalias %A, i32 %B, i32 %C) { +entry: + br label %for.body + +; A loop that has a small loop body (except for the annotations) that should be +; unrolled with the default heuristic. Make sure the extra annotations do not +; prevent unrolling +for.body: ; preds = %entry, %for.body + %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + ; The real loop. + %mul = mul nsw i32 %B, %C + %arrayidx = getelementptr inbounds i32* %A, i32 %i.01 + store i32 %mul, i32* %arrayidx, align 4 + %inc = add nsw i32 %i.01, 1 + %exitcond = icmp ne i32 %inc, 4 + + ; A bunch of annotations + %annot.0 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.1 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.2 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.3 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.4 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.5 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.6 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.7 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.8 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.9 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.10 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.11 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.12 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.13 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.14 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.15 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.16 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.17 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.18 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.19 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.20 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.21 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.22 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.23 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.24 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.25 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.26 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.27 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.28 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.29 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.30 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.31 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.32 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.33 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.34 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.35 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.36 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.37 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.38 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.39 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.40 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.41 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.42 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.43 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.44 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.45 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.46 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.47 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.48 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.49 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.50 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.51 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.52 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.53 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.54 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.55 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.56 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.57 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.58 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.59 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.60 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.61 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.62 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.63 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.64 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.65 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.66 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.67 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.68 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.69 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.70 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.71 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.72 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.73 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.74 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.75 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.76 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.77 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.78 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.79 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.80 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.81 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.82 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.83 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.84 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.85 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.86 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.87 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.88 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.89 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.90 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.91 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.92 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.93 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.94 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.95 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.96 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.97 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.98 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + %annot.99 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0) + br i1 %exitcond, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + +declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32) diff --git a/test/Transforms/LoopUnroll/nsw-tripcount.ll b/test/Transforms/LoopUnroll/nsw-tripcount.ll new file mode 100644 index 0000000..98cab32 --- /dev/null +++ b/test/Transforms/LoopUnroll/nsw-tripcount.ll @@ -0,0 +1,32 @@ +; RUN: opt -loop-unroll -S %s | FileCheck %s + +; extern void f(int); +; void test1(int v) { +; for (int i=v; i<=v+1; ++i) +; f(i); +; } +; +; We can use the nsw information to see that the tripcount will be 2, so the +; loop should be unrolled as this is always beneficial + +declare void @f(i32) + +; CHECK-LABEL: @test1 +define void @test1(i32 %v) { +entry: + %add = add nsw i32 %v, 1 + br label %for.body + +for.body: + %i.04 = phi i32 [ %v, %entry ], [ %inc, %for.body ] + tail call void @f(i32 %i.04) + %inc = add nsw i32 %i.04, 1 + %cmp = icmp slt i32 %i.04, %add + br i1 %cmp, label %for.body, label %for.end + +; CHECK: call void @f +; CHECK-NOT: br i1 +; CHECK: call void @f +for.end: + ret void +} diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll index a14087d..05d03f2 100644 --- a/test/Transforms/LoopUnroll/runtime-loop.ll +++ b/test/Transforms/LoopUnroll/runtime-loop.ll @@ -3,15 +3,16 @@ ; Tests for unrolling loops with run-time trip counts ; CHECK: %xtraiter = and i32 %n -; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0 -; CHECK: %lcmp.overflow = icmp eq i32 %n, 0 -; CHECK: %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod -; CHECK: br i1 %lcmp.or, label %unr.cmp +; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0 +; CHECK: %lcmp.overflow = icmp eq i32 %n, 0 +; CHECK: %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod +; CHECK: br i1 %lcmp.or, label %for.body.prol, label %for.body.preheader.split -; CHECK: unr.cmp{{.*}}: -; CHECK: for.body.unr{{.*}}: -; CHECK: for.body: -; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body +; CHECK: for.body.prol: +; CHECK: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.preheader ] +; CHECK: %prol.iter.sub = sub i32 %prol.iter, 1 +; CHECK: %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0 +; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split, !llvm.loop !0 define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly { entry: @@ -39,7 +40,7 @@ for.end: ; preds = %for.body, %entry ; even if the -unroll-runtime is specified ; CHECK: for.body: -; CHECK-NOT: for.body.unr: +; CHECK-NOT: for.body.prol: define i32 @test1(i32* nocapture %a) nounwind uwtable readonly { entry: @@ -85,8 +86,8 @@ cond_true138: ; Test run-time unrolling for a loop that counts down by -2. -; CHECK: for.body.unr: -; CHECK: br i1 %cmp.7, label %for.cond.for.end_crit_edge{{.*}}, label %for.body +; CHECK: for.body.prol: +; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split define zeroext i16 @down(i16* nocapture %p, i32 %len) nounwind uwtable readonly { entry: @@ -113,3 +114,7 @@ for.end: ; preds = %for.cond.for.end_cr %res.0.lcssa = phi i16 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ] ret i16 %res.0.lcssa } + +; CHECK: !0 = metadata !{metadata !0, metadata !1} +; CHECK: !1 = metadata !{metadata !"llvm.loop.unroll.disable"} + diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll index ad99b8c..38b4f32 100644 --- a/test/Transforms/LoopUnroll/runtime-loop1.ll +++ b/test/Transforms/LoopUnroll/runtime-loop1.ll @@ -1,11 +1,11 @@ -; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=4 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 | FileCheck %s ; This tests that setting the unroll count works -; CHECK: unr.cmp: -; CHECK: for.body.unr: +; CHECK: for.body.prol: +; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split ; CHECK: for.body: -; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body +; CHECK: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body ; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly { diff --git a/test/Transforms/LoopUnroll/runtime-loop2.ll b/test/Transforms/LoopUnroll/runtime-loop2.ll index cbc7af5..7205c68 100644 --- a/test/Transforms/LoopUnroll/runtime-loop2.ll +++ b/test/Transforms/LoopUnroll/runtime-loop2.ll @@ -3,8 +3,7 @@ ; Choose a smaller, power-of-two, unroll count if the loop is too large. ; This test makes sure we're not unrolling 'odd' counts -; CHECK: unr.cmp: -; CHECK: for.body.unr: +; CHECK: for.body.prol: ; CHECK: for.body: ; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body ; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body diff --git a/test/Transforms/LoopUnroll/scevunroll.ll b/test/Transforms/LoopUnroll/scevunroll.ll index c3086e8..20161d7 100644 --- a/test/Transforms/LoopUnroll/scevunroll.ll +++ b/test/Transforms/LoopUnroll/scevunroll.ll @@ -66,16 +66,13 @@ exit2: ; SCEV properly unrolls multi-exit loops. ; -; SCEV cannot currently unroll this loop. -; It should ideally detect a trip count of 5. -; rdar:14038809 [SCEV]: Optimize trip count computation for multi-exit loops. ; CHECK-LABEL: @multiExit( -; CHECKFIXME: getelementptr i32* %base, i32 10 -; CHECKFIXME-NEXT: load i32* -; CHECKFIXME: br i1 false, label %l2.10, label %exit1 -; CHECKFIXME: l2.10: -; CHECKFIXME-NOT: br -; CHECKFIXME: ret i32 +; CHECK: getelementptr i32* %base, i32 10 +; CHECK-NEXT: load i32* +; CHECK: br i1 false, label %l2.10, label %exit1 +; CHECK: l2.10: +; CHECK-NOT: br +; CHECK: ret i32 define i32 @multiExit(i32* %base) nounwind { entry: br label %l1 diff --git a/test/Transforms/LoopUnroll/tripcount-overflow.ll b/test/Transforms/LoopUnroll/tripcount-overflow.ll new file mode 100644 index 0000000..d593685 --- /dev/null +++ b/test/Transforms/LoopUnroll/tripcount-overflow.ll @@ -0,0 +1,30 @@ +; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; When prologue is fully unrolled, the branch on its end is unconditional. +; Unrolling it is illegal if we can't prove that trip-count+1 doesn't overflow, +; like in this example, where it comes from an argument. +; +; This test is based on an example from here: +; http://stackoverflow.com/questions/23838661/why-is-clang-optimizing-this-code-out +; +; CHECK: while.body.prol: +; CHECK: br i1 +; CHECK: entry.split: + +; Function Attrs: nounwind readnone ssp uwtable +define i32 @foo(i32 %N) #0 { +entry: + br label %while.body + +while.body: ; preds = %while.body, %entry + %i = phi i32 [ 0, %entry ], [ %inc, %while.body ] + %cmp = icmp eq i32 %i, %N + %inc = add i32 %i, 1 + br i1 %cmp, label %while.end, label %while.body + +while.end: ; preds = %while.body + ret i32 %i +} + +attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll b/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll new file mode 100644 index 0000000..db18f25 --- /dev/null +++ b/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll @@ -0,0 +1,149 @@ +; RUN: opt < %s -loop-unroll -S | FileCheck %s +; +; Verify that the unrolling pass removes existing unroll count metadata +; and adds a disable unrolling node after unrolling is complete. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; #pragma clang loop vectorize(enable) unroll_count(4) vectorize_width(8) +; +; Unroll count metadata should be replaced with unroll(disable). Vectorize +; metadata should be untouched. +; +; CHECK-LABEL: @unroll_count_4( +; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_1:.*]] +define void @unroll_count_4(i32* nocapture %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 + +for.end: ; preds = %for.body + ret void +} +!1 = metadata !{metadata !1, metadata !2, metadata !3, metadata !4} +!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true} +!3 = metadata !{metadata !"llvm.loop.unroll.count", i32 4} +!4 = metadata !{metadata !"llvm.loop.vectorize.width", i32 8} + +; #pragma clang loop unroll(full) +; +; An unroll disable metadata node is only added for the unroll count case. +; In this case, the loop has a full unroll metadata but can't be fully unrolled +; because the trip count is dynamic. The full unroll metadata should remain +; after unrolling. +; +; CHECK-LABEL: @unroll_full( +; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_2:.*]] +define void @unroll_full(i32* nocapture %a, i32 %b) { +entry: + %cmp3 = icmp sgt i32 %b, 0 + br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !5 + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %b + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5 + +for.end: ; preds = %for.body, %entry + ret void +} +!5 = metadata !{metadata !5, metadata !6} +!6 = metadata !{metadata !"llvm.loop.unroll.full"} + +; #pragma clang loop unroll(disable) +; +; Unroll metadata should not change. +; +; CHECK-LABEL: @unroll_disable( +; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_3:.*]] +define void @unroll_disable(i32* nocapture %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7 + +for.end: ; preds = %for.body + ret void +} +!7 = metadata !{metadata !7, metadata !8} +!8 = metadata !{metadata !"llvm.loop.unroll.disable"} + +; This function contains two loops which share the same llvm.loop metadata node +; with an llvm.loop.unroll.count 2 hint. Both loops should be unrolled. This +; verifies that adding disable metadata to a loop after unrolling doesn't affect +; other loops which previously shared the same llvm.loop metadata. +; +; CHECK-LABEL: @shared_metadata( +; CHECK: store i32 +; CHECK: store i32 +; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_4:.*]] +; CHECK: store i32 +; CHECK: store i32 +; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_5:.*]] +define void @shared_metadata(i32* nocapture %List) #0 { +entry: + br label %for.body3 + +for.body3: ; preds = %for.body3, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body3 ] + %arrayidx = getelementptr inbounds i32* %List, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %add4 = add nsw i32 %0, 10 + store i32 %add4, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 4 + br i1 %exitcond, label %for.body3.1.preheader, label %for.body3, !llvm.loop !9 + +for.body3.1.preheader: ; preds = %for.body3 + br label %for.body3.1 + +for.body3.1: ; preds = %for.body3.1.preheader, %for.body3.1 + %indvars.iv.1 = phi i64 [ %1, %for.body3.1 ], [ 0, %for.body3.1.preheader ] + %1 = add nsw i64 %indvars.iv.1, 1 + %arrayidx.1 = getelementptr inbounds i32* %List, i64 %1 + %2 = load i32* %arrayidx.1, align 4 + %add4.1 = add nsw i32 %2, 10 + store i32 %add4.1, i32* %arrayidx.1, align 4 + %exitcond.1 = icmp eq i64 %1, 4 + br i1 %exitcond.1, label %for.inc5.1, label %for.body3.1, !llvm.loop !9 + +for.inc5.1: ; preds = %for.body3.1 + ret void +} +!9 = metadata !{metadata !9, metadata !10} +!10 = metadata !{metadata !"llvm.loop.unroll.count", i32 2} + + +; CHECK: ![[LOOP_1]] = metadata !{metadata ![[LOOP_1]], metadata ![[VEC_ENABLE:.*]], metadata ![[WIDTH_8:.*]], metadata ![[UNROLL_DISABLE:.*]]} +; CHECK: ![[VEC_ENABLE]] = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true} +; CHECK: ![[WIDTH_8]] = metadata !{metadata !"llvm.loop.vectorize.width", i32 8} +; CHECK: ![[UNROLL_DISABLE]] = metadata !{metadata !"llvm.loop.unroll.disable"} +; CHECK: ![[LOOP_2]] = metadata !{metadata ![[LOOP_2]], metadata ![[UNROLL_FULL:.*]]} +; CHECK: ![[UNROLL_FULL]] = metadata !{metadata !"llvm.loop.unroll.full"} +; CHECK: ![[LOOP_3]] = metadata !{metadata ![[LOOP_3]], metadata ![[UNROLL_DISABLE:.*]]} +; CHECK: ![[LOOP_4]] = metadata !{metadata ![[LOOP_4]], metadata ![[UNROLL_DISABLE:.*]]} +; CHECK: ![[LOOP_5]] = metadata !{metadata ![[LOOP_5]], metadata ![[UNROLL_DISABLE:.*]]} diff --git a/test/Transforms/LoopUnroll/unroll-pragmas.ll b/test/Transforms/LoopUnroll/unroll-pragmas.ll index 5e45a2d..1ca249d 100644 --- a/test/Transforms/LoopUnroll/unroll-pragmas.ll +++ b/test/Transforms/LoopUnroll/unroll-pragmas.ll @@ -1,4 +1,8 @@ ; RUN: opt < %s -loop-unroll -S | FileCheck %s +; RUN: opt < %s -loop-unroll -loop-unroll -S | FileCheck %s +; +; Run loop unrolling twice to verify that loop unrolling metadata is properly +; removed and further unrolling is disabled after the pass is run once. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -51,11 +55,11 @@ for.end: ; preds = %for.body ret void } !1 = metadata !{metadata !1, metadata !2} -!2 = metadata !{metadata !"llvm.loop.unroll.enable", i1 false} +!2 = metadata !{metadata !"llvm.loop.unroll.disable"} ; loop64 has a high enough count that it should *not* be unrolled by ; the default unrolling heuristic. It serves as the control for the -; unroll(enable) pragma test loop64_with_.* tests below. +; unroll(full) pragma test loop64_with_.* tests below. ; ; CHECK-LABEL: @loop64( ; CHECK: store i32 @@ -79,7 +83,7 @@ for.end: ; preds = %for.body ret void } -; #pragma clang loop unroll(enable) +; #pragma clang loop unroll(full) ; Loop should be fully unrolled. ; ; CHECK-LABEL: @loop64_with_enable( @@ -102,7 +106,7 @@ for.end: ; preds = %for.body ret void } !3 = metadata !{metadata !3, metadata !4} -!4 = metadata !{metadata !"llvm.loop.unroll.enable", i1 true} +!4 = metadata !{metadata !"llvm.loop.unroll.full"} ; #pragma clang loop unroll_count(4) ; Loop should be unrolled 4 times. @@ -134,37 +138,7 @@ for.end: ; preds = %for.body !5 = metadata !{metadata !5, metadata !6} !6 = metadata !{metadata !"llvm.loop.unroll.count", i32 4} - -; #pragma clang loop unroll_count(enable) unroll_count(4) -; Loop should be unrolled 4 times. -; -; CHECK-LABEL: @loop64_with_enable_and_count4( -; CHECK: store i32 -; CHECK: store i32 -; CHECK: store i32 -; CHECK: store i32 -; CHECK-NOT: store i32 -; CHECK: br i1 -define void @loop64_with_enable_and_count4(i32* nocapture %a) { -entry: - br label %for.body - -for.body: ; preds = %for.body, %entry - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 - %inc = add nsw i32 %0, 1 - store i32 %inc, i32* %arrayidx, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 64 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7 - -for.end: ; preds = %for.body - ret void -} -!7 = metadata !{metadata !7, metadata !6, metadata !4} - -; #pragma clang loop unroll_count(enable) +; #pragma clang loop unroll(full) ; Full unrolling is requested, but loop has a dynamic trip count so ; no unrolling should occur. ; @@ -257,7 +231,7 @@ for.end: ; preds = %for.body !10 = metadata !{metadata !10, metadata !11} !11 = metadata !{metadata !"llvm.loop.unroll.count", i32 1} -; #pragma clang loop unroll(enable) +; #pragma clang loop unroll(full) ; Loop has very high loop count (1 million) and full unrolling was requested. ; Loop should unrolled up to the pragma threshold, but not completely. ; diff --git a/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll b/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll new file mode 100644 index 0000000..adbf47d --- /dev/null +++ b/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll @@ -0,0 +1,35 @@ +; RUN: opt -S < %s -loop-unroll -block-freq | FileCheck %s +; Crasher from PR20987. + +; CHECK: define void @update_loop_info_in_subloops +; CHECK: entry: +; CHECK: L: +; CHECK: L.inner: +; CHECK: L.inner.latch: +; CHECK: L.latch: +; CHECK: L.inner.1: +; CHECK: L.inner.latch.1: +; CHECK: L.latch.1: + +define void @update_loop_info_in_subloops() { +entry: + br label %L + +L: + %0 = phi i64 [ 1, %entry ], [ %1, %L.latch ] + br label %L.inner + +L.inner: + br label %L.inner.latch + +L.inner.latch: + br i1 false, label %L.latch, label %L.inner + +L.latch: + %1 = add i64 %0, 1 + %2 = icmp eq i64 %1, 3 + br i1 %2, label %exit, label %L + +exit: + ret void +} -- cgit v1.1