From 2c3e0051c31c3f5b2328b447eadf1cf9c4427442 Mon Sep 17 00:00:00 2001 From: Pirama Arumuga Nainar Date: Wed, 6 May 2015 11:46:36 -0700 Subject: Update aosp/master LLVM for rebase to r235153 Change-Id: I9bf53792f9fc30570e81a8d80d296c681d005ea7 (cherry picked from commit 0c7f116bb6950ef819323d855415b2f2b0aad987) --- .../BasicAA/2006-03-03-BadArraySubscript.ll | 2 +- test/Analysis/BasicAA/2008-04-15-Byval.ll | 2 +- test/Analysis/BasicAA/byval.ll | 2 +- test/Analysis/BlockFrequencyInfo/bad_input.ll | 3 +- .../BlockFrequencyInfo/loops_with_profile_info.ll | 204 +++++++++++++++++++++ test/Analysis/BranchProbabilityInfo/basic.ll | 28 +++ test/Analysis/CallGraph/2008-09-09-DirectCall.ll | 2 +- test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll | 198 ++++++++++++++++++++ .../DivergenceAnalysis/NVPTX/lit.local.cfg | 2 + test/Analysis/GlobalsModRef/volatile-instrs.ll | 2 +- test/Analysis/LazyCallGraph/basic.ll | 2 +- .../backward-dep-different-types.ll | 1 - test/Analysis/LoopAccessAnalysis/safe-no-checks.ll | 43 +++++ .../store-to-invariant-check1.ll | 53 ++++++ .../store-to-invariant-check2.ll | 54 ++++++ .../store-to-invariant-check3.ll | 53 ++++++ .../LoopAccessAnalysis/unsafe-and-rt-checks.ll | 1 - .../ScalarEvolution/2008-11-18-LessThanOrEqual.ll | 2 +- .../ScalarEvolution/2012-03-26-LoadConstant.ll | 2 +- .../ScalarEvolution/latch-dominating-conditions.ll | 55 ++++++ test/Analysis/ScalarEvolution/max-trip-count.ll | 2 +- .../Analysis/ScalarEvolution/zext-signed-addrec.ll | 2 +- .../ValueTracking/memory-dereferenceable.ll | 4 +- 23 files changed, 704 insertions(+), 15 deletions(-) create mode 100644 test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll create mode 100644 test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll create mode 100644 test/Analysis/DivergenceAnalysis/NVPTX/lit.local.cfg create mode 100644 test/Analysis/LoopAccessAnalysis/safe-no-checks.ll create mode 100644 test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll create mode 100644 test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll create mode 100644 test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll create mode 100644 test/Analysis/ScalarEvolution/latch-dominating-conditions.ll (limited to 'test/Analysis') diff --git a/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll b/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll index eb05e1e..98161bf 100644 --- a/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll +++ b/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll @@ -26,7 +26,7 @@ no_exit: ; preds = %no_exit, %entry loopexit: ; preds = %no_exit, %entry %Y.0.1 = phi i32 [ 0, %entry ], [ %tmp.13, %no_exit ] ; [#uses=1] %tmp.4 = getelementptr [3 x [3 x i32]], [3 x [3 x i32]]* %X, i32 0, i32 0 ; <[3 x i32]*> [#uses=1] - %tmp.15 = call i32 (...)* @foo( [3 x i32]* %tmp.4, i32 %Y.0.1 ) ; [#uses=0] + %tmp.15 = call i32 (...) @foo( [3 x i32]* %tmp.4, i32 %Y.0.1 ) ; [#uses=0] ret void } diff --git a/test/Analysis/BasicAA/2008-04-15-Byval.ll b/test/Analysis/BasicAA/2008-04-15-Byval.ll index 9df12bd..9d4fd14 100644 --- a/test/Analysis/BasicAA/2008-04-15-Byval.ll +++ b/test/Analysis/BasicAA/2008-04-15-Byval.ll @@ -10,7 +10,7 @@ entry: %tmp = getelementptr %struct.x, %struct.x* %X, i32 0, i32 0 ; <[4 x i32]*> [#uses=1] %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3 ; [#uses=1] store i32 2, i32* %tmp1, align 4 - %tmp2 = call i32 (...)* @bar( %struct.x* byval align 4 %X ) nounwind ; [#uses=0] + %tmp2 = call i32 (...) @bar( %struct.x* byval align 4 %X ) nounwind ; [#uses=0] br label %return return: ; preds = %entry ret void diff --git a/test/Analysis/BasicAA/byval.ll b/test/Analysis/BasicAA/byval.ll index edbe7b3..4f90c3f 100644 --- a/test/Analysis/BasicAA/byval.ll +++ b/test/Analysis/BasicAA/byval.ll @@ -6,7 +6,7 @@ target triple = "i686-apple-darwin8" define i32 @foo(%struct.x* byval %a) nounwind { ; CHECK: ret i32 1 - %tmp1 = tail call i32 (...)* @bar( %struct.x* %a ) nounwind ; [#uses=0] + %tmp1 = tail call i32 (...) @bar( %struct.x* %a ) nounwind ; [#uses=0] %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0 ; [#uses=2] store i32 1, i32* %tmp2, align 4 store i32 2, i32* @g, align 4 diff --git a/test/Analysis/BlockFrequencyInfo/bad_input.ll b/test/Analysis/BlockFrequencyInfo/bad_input.ll index da62dca..e5b1f50 100644 --- a/test/Analysis/BlockFrequencyInfo/bad_input.ll +++ b/test/Analysis/BlockFrequencyInfo/bad_input.ll @@ -32,7 +32,8 @@ define void @infinite_loop(i1 %x) { entry: br i1 %x, label %for.body, label %for.end, !prof !1 -; Check that the loop scale maxes out at 4096, giving 2048 here. +; Check that the infinite loop is arbitrarily scaled to max out at 4096, +; giving 2048 here. ; CHECK-NEXT: for.body: float = 2048.0, for.body: %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] diff --git a/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll b/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll new file mode 100644 index 0000000..534c4ad --- /dev/null +++ b/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll @@ -0,0 +1,204 @@ +; RUN: opt < %s -analyze -block-freq | FileCheck %s + +; This code contains three loops. One is triple-nested, the +; second is double nested and the third is a single loop. At +; runtime, all three loops execute 1,000,000 times each. We use to +; give different frequencies to each of the loops because loop +; scales were limited to no more than 4,096. +; +; This was penalizing the hotness of the second and third loops +; because BFI was reducing the loop scale for for.cond16 and +; for.cond26 to a max of 4,096. +; +; Without this restriction, all loops are now correctly given the same +; frequency values. +; +; Original C code: +; +; +; int g; +; __attribute__((noinline)) void bar() { +; g++; +; } +; +; extern int printf(const char*, ...); +; +; int main() +; { +; int i, j, k; +; +; g = 0; +; for (i = 0; i < 100; i++) +; for (j = 0; j < 100; j++) +; for (k = 0; k < 100; k++) +; bar(); +; +; printf ("g = %d\n", g); +; g = 0; +; +; for (i = 0; i < 100; i++) +; for (j = 0; j < 10000; j++) +; bar(); +; +; printf ("g = %d\n", g); +; g = 0; +; +; +; for (i = 0; i < 1000000; i++) +; bar(); +; +; printf ("g = %d\n", g); +; g = 0; +; } + +@g = common global i32 0, align 4 +@.str = private unnamed_addr constant [8 x i8] c"g = %d\0A\00", align 1 + +declare void @bar() +declare i32 @printf(i8*, ...) + +; CHECK: Printing analysis {{.*}} for function 'main': +; CHECK-NEXT: block-frequency-info: main +define i32 @main() { +entry: + %retval = alloca i32, align 4 + %i = alloca i32, align 4 + %j = alloca i32, align 4 + %k = alloca i32, align 4 + store i32 0, i32* %retval + store i32 0, i32* @g, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc10, %entry + %0 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %0, 100 + br i1 %cmp, label %for.body, label %for.end12, !prof !1 + +for.body: ; preds = %for.cond + store i32 0, i32* %j, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc7, %for.body + %1 = load i32, i32* %j, align 4 + %cmp2 = icmp slt i32 %1, 100 + br i1 %cmp2, label %for.body3, label %for.end9, !prof !2 + +for.body3: ; preds = %for.cond1 + store i32 0, i32* %k, align 4 + br label %for.cond4 + +for.cond4: ; preds = %for.inc, %for.body3 + %2 = load i32, i32* %k, align 4 + %cmp5 = icmp slt i32 %2, 100 + br i1 %cmp5, label %for.body6, label %for.end, !prof !3 + +; CHECK: - for.body6: float = 500000.5, int = 4000003 +for.body6: ; preds = %for.cond4 + call void @bar() + br label %for.inc + +for.inc: ; preds = %for.body6 + %3 = load i32, i32* %k, align 4 + %inc = add nsw i32 %3, 1 + store i32 %inc, i32* %k, align 4 + br label %for.cond4 + +for.end: ; preds = %for.cond4 + br label %for.inc7 + +for.inc7: ; preds = %for.end + %4 = load i32, i32* %j, align 4 + %inc8 = add nsw i32 %4, 1 + store i32 %inc8, i32* %j, align 4 + br label %for.cond1 + +for.end9: ; preds = %for.cond1 + br label %for.inc10 + +for.inc10: ; preds = %for.end9 + %5 = load i32, i32* %i, align 4 + %inc11 = add nsw i32 %5, 1 + store i32 %inc11, i32* %i, align 4 + br label %for.cond + +for.end12: ; preds = %for.cond + %6 = load i32, i32* @g, align 4 + %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %6) + store i32 0, i32* @g, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond13 + +for.cond13: ; preds = %for.inc22, %for.end12 + %7 = load i32, i32* %i, align 4 + %cmp14 = icmp slt i32 %7, 100 + br i1 %cmp14, label %for.body15, label %for.end24, !prof !1 + +for.body15: ; preds = %for.cond13 + store i32 0, i32* %j, align 4 + br label %for.cond16 + +for.cond16: ; preds = %for.inc19, %for.body15 + %8 = load i32, i32* %j, align 4 + %cmp17 = icmp slt i32 %8, 10000 + br i1 %cmp17, label %for.body18, label %for.end21, !prof !4 + +; CHECK: - for.body18: float = 500000.5, int = 4000003 +for.body18: ; preds = %for.cond16 + call void @bar() + br label %for.inc19 + +for.inc19: ; preds = %for.body18 + %9 = load i32, i32* %j, align 4 + %inc20 = add nsw i32 %9, 1 + store i32 %inc20, i32* %j, align 4 + br label %for.cond16 + +for.end21: ; preds = %for.cond16 + br label %for.inc22 + +for.inc22: ; preds = %for.end21 + %10 = load i32, i32* %i, align 4 + %inc23 = add nsw i32 %10, 1 + store i32 %inc23, i32* %i, align 4 + br label %for.cond13 + +for.end24: ; preds = %for.cond13 + %11 = load i32, i32* @g, align 4 + %call25 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %11) + store i32 0, i32* @g, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond26 + +for.cond26: ; preds = %for.inc29, %for.end24 + %12 = load i32, i32* %i, align 4 + %cmp27 = icmp slt i32 %12, 1000000 + br i1 %cmp27, label %for.body28, label %for.end31, !prof !5 + +; CHECK: - for.body28: float = 500000.5, int = 4000003 +for.body28: ; preds = %for.cond26 + call void @bar() + br label %for.inc29 + +for.inc29: ; preds = %for.body28 + %13 = load i32, i32* %i, align 4 + %inc30 = add nsw i32 %13, 1 + store i32 %inc30, i32* %i, align 4 + br label %for.cond26 + +for.end31: ; preds = %for.cond26 + %14 = load i32, i32* @g, align 4 + %call32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %14) + store i32 0, i32* @g, align 4 + %15 = load i32, i32* %retval + ret i32 %15 +} + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.7.0 (trunk 232635) (llvm/trunk 232636)"} +!1 = !{!"branch_weights", i32 101, i32 2} +!2 = !{!"branch_weights", i32 10001, i32 101} +!3 = !{!"branch_weights", i32 1000001, i32 10001} +!4 = !{!"branch_weights", i32 1000001, i32 101} +!5 = !{!"branch_weights", i32 1000001, i32 2} diff --git a/test/Analysis/BranchProbabilityInfo/basic.ll b/test/Analysis/BranchProbabilityInfo/basic.ll index 0f66911..2c9c156 100644 --- a/test/Analysis/BranchProbabilityInfo/basic.ll +++ b/test/Analysis/BranchProbabilityInfo/basic.ll @@ -212,3 +212,31 @@ exit: ret i32 %result } +define i32 @zero3(i32 %i, i32 %a, i32 %b) { +; CHECK: Printing analysis {{.*}} for function 'zero3' +entry: +; AND'ing with a single bit bitmask essentially leads to a bool comparison, +; meaning we don't have probability information. + %and = and i32 %i, 2 + %tobool = icmp eq i32 %and, 0 + br i1 %tobool, label %then, label %else +; CHECK: edge entry -> then probability is 16 / 32 +; CHECK: edge entry -> else probability is 16 / 32 + +then: +; AND'ing with other bitmask might be something else, so we still assume the +; usual probabilities. + %and2 = and i32 %i, 5 + %tobool2 = icmp eq i32 %and2, 0 + br i1 %tobool2, label %else, label %exit +; CHECK: edge then -> else probability is 12 / 32 +; CHECK: edge then -> exit probability is 20 / 32 + +else: + br label %exit + +exit: + %result = phi i32 [ %a, %then ], [ %b, %else ] + ret i32 %result +} + diff --git a/test/Analysis/CallGraph/2008-09-09-DirectCall.ll b/test/Analysis/CallGraph/2008-09-09-DirectCall.ll index 595cc42..56eac49 100644 --- a/test/Analysis/CallGraph/2008-09-09-DirectCall.ll +++ b/test/Analysis/CallGraph/2008-09-09-DirectCall.ll @@ -12,6 +12,6 @@ entry: define void @caller() { entry: - call void (...)* @callee( void (...)* @callee ) + call void (...) @callee( void (...)* @callee ) unreachable } diff --git a/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll b/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll new file mode 100644 index 0000000..9dd3d55 --- /dev/null +++ b/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll @@ -0,0 +1,198 @@ +; RUN: opt %s -analyze -divergence | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +; return (n < 0 ? a + threadIdx.x : b + threadIdx.x) +define i32 @no_diverge(i32 %n, i32 %a, i32 %b) { +; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'no_diverge' +entry: + %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + %cond = icmp slt i32 %n, 0 + br i1 %cond, label %then, label %else ; uniform +; CHECK-NOT: DIVERGENT: br i1 %cond, +then: + %a1 = add i32 %a, %tid + br label %merge +else: + %b2 = add i32 %b, %tid + br label %merge +merge: + %c = phi i32 [ %a1, %then ], [ %b2, %else ] + ret i32 %c +} + +; c = a; +; if (threadIdx.x < 5) // divergent: data dependent +; c = b; +; return c; // c is divergent: sync dependent +define i32 @sync(i32 %a, i32 %b) { +; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'sync' +bb1: + %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() + %cond = icmp slt i32 %tid, 5 + br i1 %cond, label %bb2, label %bb3 +; CHECK: DIVERGENT: br i1 %cond, +bb2: + br label %bb3 +bb3: + %c = phi i32 [ %a, %bb1 ], [ %b, %bb2 ] ; sync dependent on tid +; CHECK: DIVERGENT: %c = + ret i32 %c +} + +; c = 0; +; if (threadIdx.x >= 5) { // divergent +; c = (n < 0 ? a : b); // c here is uniform because n is uniform +; } +; // c here is divergent because it is sync dependent on threadIdx.x >= 5 +; return c; +define i32 @mixed(i32 %n, i32 %a, i32 %b) { +; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'mixed' +bb1: + %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z() + %cond = icmp slt i32 %tid, 5 + br i1 %cond, label %bb6, label %bb2 +; CHECK: DIVERGENT: br i1 %cond, +bb2: + %cond2 = icmp slt i32 %n, 0 + br i1 %cond2, label %bb4, label %bb3 +bb3: + br label %bb5 +bb4: + br label %bb5 +bb5: + %c = phi i32 [ %a, %bb3 ], [ %b, %bb4 ] +; CHECK-NOT: DIVERGENT: %c = + br label %bb6 +bb6: + %c2 = phi i32 [ 0, %bb1], [ %c, %bb5 ] +; CHECK: DIVERGENT: %c2 = + ret i32 %c2 +} + +; We conservatively treats all parameters of a __device__ function as divergent. +define i32 @device(i32 %n, i32 %a, i32 %b) { +; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'device' +; CHECK: DIVERGENT: i32 %n +; CHECK: DIVERGENT: i32 %a +; CHECK: DIVERGENT: i32 %b +entry: + %cond = icmp slt i32 %n, 0 + br i1 %cond, label %then, label %else +; CHECK: DIVERGENT: br i1 %cond, +then: + br label %merge +else: + br label %merge +merge: + %c = phi i32 [ %a, %then ], [ %b, %else ] + ret i32 %c +} + +; int i = 0; +; do { +; i++; // i here is uniform +; } while (i < laneid); +; return i == 10 ? 0 : 1; // i here is divergent +; +; The i defined in the loop is used outside. +define i32 @loop() { +; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'loop' +entry: + %laneid = call i32 @llvm.ptx.read.laneid() + br label %loop +loop: + %i = phi i32 [ 0, %entry ], [ %i1, %loop ] +; CHECK-NOT: DIVERGENT: %i = + %i1 = add i32 %i, 1 + %exit_cond = icmp sge i32 %i1, %laneid + br i1 %exit_cond, label %loop_exit, label %loop +loop_exit: + %cond = icmp eq i32 %i, 10 + br i1 %cond, label %then, label %else +; CHECK: DIVERGENT: br i1 %cond, +then: + ret i32 0 +else: + ret i32 1 +} + +; Same as @loop, but the loop is in the LCSSA form. +define i32 @lcssa() { +; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'lcssa' +entry: + %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + br label %loop +loop: + %i = phi i32 [ 0, %entry ], [ %i1, %loop ] +; CHECK-NOT: DIVERGENT: %i = + %i1 = add i32 %i, 1 + %exit_cond = icmp sge i32 %i1, %tid + br i1 %exit_cond, label %loop_exit, label %loop +loop_exit: + %i.lcssa = phi i32 [ %i, %loop ] +; CHECK: DIVERGENT: %i.lcssa = + %cond = icmp eq i32 %i.lcssa, 10 + br i1 %cond, label %then, label %else +; CHECK: DIVERGENT: br i1 %cond, +then: + ret i32 0 +else: + ret i32 1 +} + +; This test contains an unstructured loop. +; +-------------- entry ----------------+ +; | | +; V V +; i1 = phi(0, i3) i2 = phi(0, i3) +; j1 = i1 + 1 ---> i3 = phi(j1, j2) <--- j2 = i2 + 2 +; ^ | ^ +; | V | +; +-------- switch (tid / i3) ----------+ +; | +; V +; if (i3 == 5) // divergent +; because sync dependent on (tid / i3). +define i32 @unstructured_loop(i1 %entry_cond) { +; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'unstructured_loop' +entry: + %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2 +loop_entry_1: + %i1 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ] + %j1 = add i32 %i1, 1 + br label %loop_body +loop_entry_2: + %i2 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ] + %j2 = add i32 %i2, 2 + br label %loop_body +loop_body: + %i3 = phi i32 [ %j1, %loop_entry_1 ], [ %j2, %loop_entry_2 ] + br label %loop_latch +loop_latch: + %div = sdiv i32 %tid, %i3 + switch i32 %div, label %branch [ i32 1, label %loop_entry_1 + i32 2, label %loop_entry_2 ] +branch: + %cmp = icmp eq i32 %i3, 5 + br i1 %cmp, label %then, label %else +; CHECK: DIVERGENT: br i1 %cmp, +then: + ret i32 0 +else: + ret i32 1 +} + +declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() +declare i32 @llvm.nvvm.read.ptx.sreg.tid.y() +declare i32 @llvm.nvvm.read.ptx.sreg.tid.z() +declare i32 @llvm.ptx.read.laneid() + +!nvvm.annotations = !{!0, !1, !2, !3, !4} +!0 = !{i32 (i32, i32, i32)* @no_diverge, !"kernel", i32 1} +!1 = !{i32 (i32, i32)* @sync, !"kernel", i32 1} +!2 = !{i32 (i32, i32, i32)* @mixed, !"kernel", i32 1} +!3 = !{i32 ()* @loop, !"kernel", i32 1} +!4 = !{i32 (i1)* @unstructured_loop, !"kernel", i32 1} diff --git a/test/Analysis/DivergenceAnalysis/NVPTX/lit.local.cfg b/test/Analysis/DivergenceAnalysis/NVPTX/lit.local.cfg new file mode 100644 index 0000000..2cb98eb --- /dev/null +++ b/test/Analysis/DivergenceAnalysis/NVPTX/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'NVPTX' in config.root.targets: + config.unsupported = True diff --git a/test/Analysis/GlobalsModRef/volatile-instrs.ll b/test/Analysis/GlobalsModRef/volatile-instrs.ll index a331bf3..5dd47bc 100644 --- a/test/Analysis/GlobalsModRef/volatile-instrs.ll +++ b/test/Analysis/GlobalsModRef/volatile-instrs.ll @@ -25,6 +25,6 @@ main_entry: %0 = load volatile i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @b, i64 0, i32 0), align 4 store i32 %0, i32* @c, align 4 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i32 4, i1 false) nounwind - %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %0) nounwind + %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %0) nounwind ret i32 0 } diff --git a/test/Analysis/LazyCallGraph/basic.ll b/test/Analysis/LazyCallGraph/basic.ll index 6e2cb90..7c13d2b 100644 --- a/test/Analysis/LazyCallGraph/basic.ll +++ b/test/Analysis/LazyCallGraph/basic.ll @@ -90,7 +90,7 @@ next: select i1 true, void ()* @f3, void ()* @f4 store void ()* @f5, void ()** %x call void @f6() - call void (void ()*, void ()*)* bitcast (void ()* @f7 to void (void ()*, void ()*)*)(void ()* @f8, void ()* @f9) + call void (void ()*, void ()*) bitcast (void ()* @f7 to void (void ()*, void ()*)*)(void ()* @f8, void ()* @f9) invoke void @f10() to label %exit unwind label %unwind exit: diff --git a/test/Analysis/LoopAccessAnalysis/backward-dep-different-types.ll b/test/Analysis/LoopAccessAnalysis/backward-dep-different-types.ll index 238f3f4..5d4fb7d 100644 --- a/test/Analysis/LoopAccessAnalysis/backward-dep-different-types.ll +++ b/test/Analysis/LoopAccessAnalysis/backward-dep-different-types.ll @@ -14,7 +14,6 @@ target triple = "x86_64-apple-macosx10.10.0" ; CHECK: Report: unsafe dependent memory operations in loop ; CHECK-NOT: Memory dependences are safe -@n = global i32 20, align 4 @B = common global i32* null, align 8 @A = common global i32* null, align 8 diff --git a/test/Analysis/LoopAccessAnalysis/safe-no-checks.ll b/test/Analysis/LoopAccessAnalysis/safe-no-checks.ll new file mode 100644 index 0000000..fa70c02 --- /dev/null +++ b/test/Analysis/LoopAccessAnalysis/safe-no-checks.ll @@ -0,0 +1,43 @@ +; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s + +; If the arrays don't alias this loop is safe with no memchecks: +; for (i = 0; i < n; i++) +; A[i] = A[i+1] * B[i] * C[i]; + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +; CHECK: Memory dependences are safe{{$}} + +define void @f(i16* noalias %a, + i16* noalias %b, + i16* noalias %c) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] + + %add = add nuw nsw i64 %ind, 1 + + %arrayidxA_plus_2 = getelementptr inbounds i16, i16* %a, i64 %add + %loadA_plus_2 = load i16, i16* %arrayidxA_plus_2, align 2 + + %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind + %loadB = load i16, i16* %arrayidxB, align 2 + + %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %ind + %loadC = load i16, i16* %arrayidxC, align 2 + + %mul = mul i16 %loadB, %loadA_plus_2 + %mul1 = mul i16 %mul, %loadC + + %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind + store i16 %mul1, i16* %arrayidxA, align 2 + + %exitcond = icmp eq i64 %add, 20 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll b/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll new file mode 100644 index 0000000..8ab8ab2 --- /dev/null +++ b/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll @@ -0,0 +1,53 @@ +; RUN: opt < %s -loop-accesses -analyze | FileCheck %s + +; Test to confirm LAA will find store to invariant address. +; Inner loop has a store to invariant address. +; +; for(; i < itr; i++) { +; for(; j < itr; j++) { +; var1[i] = var2[j] + var1[i]; +; } +; } + +; CHECK: Store to invariant address was found in loop. +; CHECK-NOT: Store to invariant address was not found in loop. + +define i32 @foo(i32* nocapture %var1, i32* nocapture readonly %var2, i32 %itr) #0 { +entry: + %cmp20 = icmp eq i32 %itr, 0 + br i1 %cmp20, label %for.end10, label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc8 + %indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc8 ], [ 0, %entry ] + %j.022 = phi i32 [ %j.1.lcssa, %for.inc8 ], [ 0, %entry ] + %cmp218 = icmp ult i32 %j.022, %itr + br i1 %cmp218, label %for.body3.lr.ph, label %for.inc8 + +for.body3.lr.ph: ; preds = %for.cond1.preheader + %arrayidx5 = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv23 + %0 = zext i32 %j.022 to i64 + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.body3.lr.ph + %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ] + %arrayidx = getelementptr inbounds i32, i32* %var2, i64 %indvars.iv + %1 = load i32, i32* %arrayidx, align 4 + %2 = load i32, i32* %arrayidx5, align 4 + %add = add nsw i32 %2, %1 + store i32 %add, i32* %arrayidx5, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %itr + br i1 %exitcond, label %for.inc8, label %for.body3 + +for.inc8: ; preds = %for.body3, %for.cond1.preheader + %j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ] + %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1 + %lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32 + %exitcond26 = icmp eq i32 %lftr.wideiv25, %itr + br i1 %exitcond26, label %for.end10, label %for.cond1.preheader + +for.end10: ; preds = %for.inc8, %entry + ret i32 undef +} + diff --git a/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll b/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll new file mode 100644 index 0000000..4da0906 --- /dev/null +++ b/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -loop-accesses -analyze | FileCheck %s + +; Test to confirm LAA will not find store to invariant address. +; Inner loop has no store to invariant address. +; +; for(; i < itr; i++) { +; for(; j < itr; j++) { +; var2[j] = var2[j] + var1[i]; +; } +; } + +; CHECK: Store to invariant address was not found in loop. +; CHECK-NOT: Store to invariant address was found in loop. + + +define i32 @foo(i32* nocapture readonly %var1, i32* nocapture %var2, i32 %itr) #0 { +entry: + %cmp20 = icmp eq i32 %itr, 0 + br i1 %cmp20, label %for.end10, label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc8 + %indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc8 ], [ 0, %entry ] + %j.022 = phi i32 [ %j.1.lcssa, %for.inc8 ], [ 0, %entry ] + %cmp218 = icmp ult i32 %j.022, %itr + br i1 %cmp218, label %for.body3.lr.ph, label %for.inc8 + +for.body3.lr.ph: ; preds = %for.cond1.preheader + %arrayidx5 = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv23 + %0 = zext i32 %j.022 to i64 + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.body3.lr.ph + %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ] + %arrayidx = getelementptr inbounds i32, i32* %var2, i64 %indvars.iv + %1 = load i32, i32* %arrayidx, align 4 + %2 = load i32, i32* %arrayidx5, align 4 + %add = add nsw i32 %2, %1 + store i32 %add, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %itr + br i1 %exitcond, label %for.inc8, label %for.body3 + +for.inc8: ; preds = %for.body3, %for.cond1.preheader + %j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ] + %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1 + %lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32 + %exitcond26 = icmp eq i32 %lftr.wideiv25, %itr + br i1 %exitcond26, label %for.end10, label %for.cond1.preheader + +for.end10: ; preds = %for.inc8, %entry + ret i32 undef +} + diff --git a/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll b/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll new file mode 100644 index 0000000..18315a5 --- /dev/null +++ b/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll @@ -0,0 +1,53 @@ +; RUN: opt < %s -loop-accesses -analyze | FileCheck %s + +; Test to confirm LAA will find store to invariant address. +; Inner loop has a store to invariant address. +; +; for(; i < itr; i++) { +; for(; j < itr; j++) { +; var1[j] = ++var2[i] + var1[j]; +; } +; } + +; CHECK: Store to invariant address was found in loop. + +define void @foo(i32* nocapture %var1, i32* nocapture %var2, i32 %itr) #0 { +entry: + %cmp20 = icmp sgt i32 %itr, 0 + br i1 %cmp20, label %for.cond1.preheader, label %for.end11 + +for.cond1.preheader: ; preds = %entry, %for.inc9 + %indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc9 ], [ 0, %entry ] + %j.022 = phi i32 [ %j.1.lcssa, %for.inc9 ], [ 0, %entry ] + %cmp218 = icmp slt i32 %j.022, %itr + br i1 %cmp218, label %for.body3.lr.ph, label %for.inc9 + +for.body3.lr.ph: ; preds = %for.cond1.preheader + %arrayidx = getelementptr inbounds i32, i32* %var2, i64 %indvars.iv23 + %0 = sext i32 %j.022 to i64 + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.body3.lr.ph + %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ] + %1 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, i32* %arrayidx, align 4 + %arrayidx5 = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv + %2 = load i32, i32* %arrayidx5, align 4 + %add = add nsw i32 %inc, %2 + store i32 %add, i32* %arrayidx5, align 4 + %indvars.iv.next = add nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %itr + br i1 %exitcond, label %for.inc9, label %for.body3 + +for.inc9: ; preds = %for.body3, %for.cond1.preheader + %j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ] + %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1 + %lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32 + %exitcond26 = icmp eq i32 %lftr.wideiv25, %itr + br i1 %exitcond26, label %for.end11, label %for.cond1.preheader + +for.end11: ; preds = %for.inc9, %entry + ret void +} diff --git a/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll b/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll index a11fd7f..ce8b86b 100644 --- a/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll +++ b/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll @@ -20,7 +20,6 @@ target triple = "x86_64-apple-macosx10.10.0" ; CHECK-NEXT: %arrayidxA_plus_2 = getelementptr inbounds i16, i16* %a, i64 %add ; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %storemerge3 -@n = global i32 20, align 4 @B = common global i16* null, align 8 @A = common global i16* null, align 8 @C = common global i16* null, align 8 diff --git a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll index 46c6c59..84561c5 100644 --- a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll +++ b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll @@ -13,7 +13,7 @@ bb.nph: ; preds = %entry bb: ; preds = %bb.nph, %bb1 %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb1 ] ; [#uses=2] %argc_addr.04 = add i32 %indvar, %argc ; [#uses=1] - tail call void (...)* @Test() nounwind + tail call void (...) @Test() nounwind %1 = add i32 %argc_addr.04, 1 ; [#uses=1] br label %bb1 diff --git a/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll b/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll index c4a4c30..33fcbab 100644 --- a/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll +++ b/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll @@ -14,7 +14,7 @@ entry: br label %lbl_818 lbl_818: ; preds = %for.end, %entry - call void (...)* @func_27() + call void (...) @func_27() store i32 0, i32* @g_814, align 4 br label %for.cond diff --git a/test/Analysis/ScalarEvolution/latch-dominating-conditions.ll b/test/Analysis/ScalarEvolution/latch-dominating-conditions.ll new file mode 100644 index 0000000..3f6f958 --- /dev/null +++ b/test/Analysis/ScalarEvolution/latch-dominating-conditions.ll @@ -0,0 +1,55 @@ +; RUN: opt -S -indvars < %s | FileCheck %s + +declare void @side_effect(i1) + +define void @latch_dominating_0(i8 %start) { +; CHECK-LABEL: latch_dominating_0 + entry: + %e = icmp slt i8 %start, 42 + br i1 %e, label %loop, label %exit + + loop: +; CHECK-LABEL: loop + %idx = phi i8 [ %start, %entry ], [ %idx.inc, %be ] + %idx.inc = add i8 %idx, 1 + %folds.to.true = icmp slt i8 %idx, 42 +; CHECK: call void @side_effect(i1 true) + call void @side_effect(i1 %folds.to.true) + %c0 = icmp slt i8 %idx.inc, 42 + br i1 %c0, label %be, label %exit + + be: +; CHECK: call void @side_effect(i1 true) + call void @side_effect(i1 %folds.to.true) + %c1 = icmp slt i8 %idx.inc, 100 + br i1 %c1, label %loop, label %exit + + exit: + ret void +} + +define void @latch_dominating_1(i8 %start) { +; CHECK-LABEL: latch_dominating_1 + entry: + %e = icmp slt i8 %start, 42 + br i1 %e, label %loop, label %exit + + loop: +; CHECK-LABEL: loop + %idx = phi i8 [ %start, %entry ], [ %idx.inc, %be ] + %idx.inc = add i8 %idx, 1 + %does.not.fold.to.true = icmp slt i8 %idx, 42 +; CHECK: call void @side_effect(i1 %does.not.fold.to.true) + call void @side_effect(i1 %does.not.fold.to.true) + %c0 = icmp slt i8 %idx.inc, 42 + br i1 %c0, label %be, label %be + + be: +; CHECK: call void @side_effect(i1 %does.not.fold.to.true) + call void @side_effect(i1 %does.not.fold.to.true) + %c1 = icmp slt i8 %idx.inc, 100 + br i1 %c1, label %loop, label %exit + + exit: + ret void +} diff --git a/test/Analysis/ScalarEvolution/max-trip-count.ll b/test/Analysis/ScalarEvolution/max-trip-count.ll index 72560c7..614e9b2 100644 --- a/test/Analysis/ScalarEvolution/max-trip-count.ll +++ b/test/Analysis/ScalarEvolution/max-trip-count.ll @@ -65,7 +65,7 @@ for.inc: ; preds = %for.body br label %for.cond for.end: ; preds = %for.body, %for.cond - %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %g_4.0) nounwind ; [#uses=0] + %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %g_4.0) nounwind ; [#uses=0] ret i32 0 } diff --git a/test/Analysis/ScalarEvolution/zext-signed-addrec.ll b/test/Analysis/ScalarEvolution/zext-signed-addrec.ll index 31ebb3e..2b12b33 100644 --- a/test/Analysis/ScalarEvolution/zext-signed-addrec.ll +++ b/test/Analysis/ScalarEvolution/zext-signed-addrec.ll @@ -63,7 +63,7 @@ for.cond.for.end9_crit_edge: ; preds = %for.inc8 for.end9: ; preds = %entry.for.end9_crit_edge, %for.cond.for.end9_crit_edge %3 = phi i32 [ %.pre, %entry.for.end9_crit_edge ], [ %shl, %for.cond.for.end9_crit_edge ] - %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %3) #2 + %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %3) #2 br label %return return.loopexit.split: ; preds = %for.cond1.preheader.lr.ph diff --git a/test/Analysis/ValueTracking/memory-dereferenceable.ll b/test/Analysis/ValueTracking/memory-dereferenceable.ll index 51f9265..bfee5c7 100644 --- a/test/Analysis/ValueTracking/memory-dereferenceable.ll +++ b/test/Analysis/ValueTracking/memory-dereferenceable.ll @@ -9,7 +9,7 @@ declare zeroext i1 @return_i1() @globalstr = global [6 x i8] c"hello\00" -define void @test(i32 addrspace(1)* dereferenceable(8) %dparam) { +define void @test(i32 addrspace(1)* dereferenceable(8) %dparam) gc "statepoint-example" { ; CHECK: The following are dereferenceable: ; CHECK: %globalptr ; CHECK: %alloca @@ -22,7 +22,7 @@ entry: %alloca = alloca i1 %load2 = load i1, i1* %alloca %load3 = load i32, i32 addrspace(1)* %dparam - %tok = tail call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam) + %tok = tail call i32 (i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam) %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %tok, i32 4, i32 4) %load4 = load i32, i32 addrspace(1)* %relocate %nparam = getelementptr i32, i32 addrspace(1)* %dparam, i32 5 -- cgit v1.1