diff options
Diffstat (limited to 'test/CodeGen/NVPTX/nounroll.ll')
-rw-r--r-- | test/CodeGen/NVPTX/nounroll.ll | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/test/CodeGen/NVPTX/nounroll.ll b/test/CodeGen/NVPTX/nounroll.ll new file mode 100644 index 0000000..db96d2a --- /dev/null +++ b/test/CodeGen/NVPTX/nounroll.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-unknown-unknown" + +; Compiled from the following CUDA code: +; +; #pragma nounroll +; for (int i = 0; i < 2; ++i) +; output[i] = input[i]; +define void @nounroll(float* %input, float* %output) { +; CHECK-LABEL: .visible .func nounroll( +entry: + br label %for.body + +for.body: +; CHECK: .pragma "nounroll" + %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %idxprom = sext i32 %i.06 to i64 + %arrayidx = getelementptr inbounds float* %input, i64 %idxprom + %0 = load float* %arrayidx, align 4 +; CHECK: ld.f32 + %arrayidx2 = getelementptr inbounds float* %output, i64 %idxprom + store float %0, float* %arrayidx2, align 4 +; CHECK: st.f32 + %inc = add nuw nsw i32 %i.06, 1 + %exitcond = icmp eq i32 %inc, 2 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 +; CHECK-NOT: ld.f32 +; CHECK-NOT: st.f32 + +for.end: + ret void +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.unroll.disable"} |