aboutsummaryrefslogtreecommitdiffstats
path: root/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll')
-rw-r--r--test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll198
1 files changed, 198 insertions, 0 deletions
diff --git a/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll b/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
new file mode 100644
index 0000000..9dd3d55
--- /dev/null
+++ b/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
@@ -0,0 +1,198 @@
+; RUN: opt %s -analyze -divergence | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; return (n < 0 ? a + threadIdx.x : b + threadIdx.x)
+define i32 @no_diverge(i32 %n, i32 %a, i32 %b) {
+; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'no_diverge'
+entry:
+ %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %cond = icmp slt i32 %n, 0
+ br i1 %cond, label %then, label %else ; uniform
+; CHECK-NOT: DIVERGENT: br i1 %cond,
+then:
+ %a1 = add i32 %a, %tid
+ br label %merge
+else:
+ %b2 = add i32 %b, %tid
+ br label %merge
+merge:
+ %c = phi i32 [ %a1, %then ], [ %b2, %else ]
+ ret i32 %c
+}
+
+; c = a;
+; if (threadIdx.x < 5) // divergent: data dependent
+; c = b;
+; return c; // c is divergent: sync dependent
+define i32 @sync(i32 %a, i32 %b) {
+; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'sync'
+bb1:
+ %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+ %cond = icmp slt i32 %tid, 5
+ br i1 %cond, label %bb2, label %bb3
+; CHECK: DIVERGENT: br i1 %cond,
+bb2:
+ br label %bb3
+bb3:
+ %c = phi i32 [ %a, %bb1 ], [ %b, %bb2 ] ; sync dependent on tid
+; CHECK: DIVERGENT: %c =
+ ret i32 %c
+}
+
+; c = 0;
+; if (threadIdx.x >= 5) { // divergent
+; c = (n < 0 ? a : b); // c here is uniform because n is uniform
+; }
+; // c here is divergent because it is sync dependent on threadIdx.x >= 5
+; return c;
+define i32 @mixed(i32 %n, i32 %a, i32 %b) {
+; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'mixed'
+bb1:
+ %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+ %cond = icmp slt i32 %tid, 5
+ br i1 %cond, label %bb6, label %bb2
+; CHECK: DIVERGENT: br i1 %cond,
+bb2:
+ %cond2 = icmp slt i32 %n, 0
+ br i1 %cond2, label %bb4, label %bb3
+bb3:
+ br label %bb5
+bb4:
+ br label %bb5
+bb5:
+ %c = phi i32 [ %a, %bb3 ], [ %b, %bb4 ]
+; CHECK-NOT: DIVERGENT: %c =
+ br label %bb6
+bb6:
+ %c2 = phi i32 [ 0, %bb1], [ %c, %bb5 ]
+; CHECK: DIVERGENT: %c2 =
+ ret i32 %c2
+}
+
+; We conservatively treats all parameters of a __device__ function as divergent.
+define i32 @device(i32 %n, i32 %a, i32 %b) {
+; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'device'
+; CHECK: DIVERGENT: i32 %n
+; CHECK: DIVERGENT: i32 %a
+; CHECK: DIVERGENT: i32 %b
+entry:
+ %cond = icmp slt i32 %n, 0
+ br i1 %cond, label %then, label %else
+; CHECK: DIVERGENT: br i1 %cond,
+then:
+ br label %merge
+else:
+ br label %merge
+merge:
+ %c = phi i32 [ %a, %then ], [ %b, %else ]
+ ret i32 %c
+}
+
+; int i = 0;
+; do {
+; i++; // i here is uniform
+; } while (i < laneid);
+; return i == 10 ? 0 : 1; // i here is divergent
+;
+; The i defined in the loop is used outside.
+define i32 @loop() {
+; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'loop'
+entry:
+ %laneid = call i32 @llvm.ptx.read.laneid()
+ br label %loop
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i1, %loop ]
+; CHECK-NOT: DIVERGENT: %i =
+ %i1 = add i32 %i, 1
+ %exit_cond = icmp sge i32 %i1, %laneid
+ br i1 %exit_cond, label %loop_exit, label %loop
+loop_exit:
+ %cond = icmp eq i32 %i, 10
+ br i1 %cond, label %then, label %else
+; CHECK: DIVERGENT: br i1 %cond,
+then:
+ ret i32 0
+else:
+ ret i32 1
+}
+
+; Same as @loop, but the loop is in the LCSSA form.
+define i32 @lcssa() {
+; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'lcssa'
+entry:
+ %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ br label %loop
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i1, %loop ]
+; CHECK-NOT: DIVERGENT: %i =
+ %i1 = add i32 %i, 1
+ %exit_cond = icmp sge i32 %i1, %tid
+ br i1 %exit_cond, label %loop_exit, label %loop
+loop_exit:
+ %i.lcssa = phi i32 [ %i, %loop ]
+; CHECK: DIVERGENT: %i.lcssa =
+ %cond = icmp eq i32 %i.lcssa, 10
+ br i1 %cond, label %then, label %else
+; CHECK: DIVERGENT: br i1 %cond,
+then:
+ ret i32 0
+else:
+ ret i32 1
+}
+
+; This test contains an unstructured loop.
+; +-------------- entry ----------------+
+; | |
+; V V
+; i1 = phi(0, i3) i2 = phi(0, i3)
+; j1 = i1 + 1 ---> i3 = phi(j1, j2) <--- j2 = i2 + 2
+; ^ | ^
+; | V |
+; +-------- switch (tid / i3) ----------+
+; |
+; V
+; if (i3 == 5) // divergent
+; because sync dependent on (tid / i3).
+define i32 @unstructured_loop(i1 %entry_cond) {
+; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'unstructured_loop'
+entry:
+ %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2
+loop_entry_1:
+ %i1 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
+ %j1 = add i32 %i1, 1
+ br label %loop_body
+loop_entry_2:
+ %i2 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ]
+ %j2 = add i32 %i2, 2
+ br label %loop_body
+loop_body:
+ %i3 = phi i32 [ %j1, %loop_entry_1 ], [ %j2, %loop_entry_2 ]
+ br label %loop_latch
+loop_latch:
+ %div = sdiv i32 %tid, %i3
+ switch i32 %div, label %branch [ i32 1, label %loop_entry_1
+ i32 2, label %loop_entry_2 ]
+branch:
+ %cmp = icmp eq i32 %i3, 5
+ br i1 %cmp, label %then, label %else
+; CHECK: DIVERGENT: br i1 %cmp,
+then:
+ ret i32 0
+else:
+ ret i32 1
+}
+
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+declare i32 @llvm.ptx.read.laneid()
+
+!nvvm.annotations = !{!0, !1, !2, !3, !4}
+!0 = !{i32 (i32, i32, i32)* @no_diverge, !"kernel", i32 1}
+!1 = !{i32 (i32, i32)* @sync, !"kernel", i32 1}
+!2 = !{i32 (i32, i32, i32)* @mixed, !"kernel", i32 1}
+!3 = !{i32 ()* @loop, !"kernel", i32 1}
+!4 = !{i32 (i1)* @unstructured_loop, !"kernel", i32 1}