diff options
author | Pirama Arumuga Nainar <pirama@google.com> | 2015-04-10 22:08:18 +0000 |
---|---|---|
committer | Android Git Automerger <android-git-automerger@android.com> | 2015-04-10 22:08:18 +0000 |
commit | 13a7db5b9c4f5e543d037be68ec3428216bfd550 (patch) | |
tree | 1b2c9792582e12f5af0b1512e3094425f0dc0df9 /test/CodeGen/R600/mad-combine.ll | |
parent | 0eb46f5d1e06a4284663d636a74b06adc3a161d7 (diff) | |
parent | 31195f0bdca6ee2a5e72d07edf13e1d81206d949 (diff) | |
download | external_llvm-13a7db5b9c4f5e543d037be68ec3428216bfd550.zip external_llvm-13a7db5b9c4f5e543d037be68ec3428216bfd550.tar.gz external_llvm-13a7db5b9c4f5e543d037be68ec3428216bfd550.tar.bz2 |
am 31195f0b: Merge "Update aosp/master llvm for rebase to r233350"
* commit '31195f0bdca6ee2a5e72d07edf13e1d81206d949':
Update aosp/master llvm for rebase to r233350
Diffstat (limited to 'test/CodeGen/R600/mad-combine.ll')
-rw-r--r-- | test/CodeGen/R600/mad-combine.ll | 276 |
1 files changed, 138 insertions, 138 deletions
diff --git a/test/CodeGen/R600/mad-combine.ll b/test/CodeGen/R600/mad-combine.ll index 8c4e09b..bc07162 100644 --- a/test/CodeGen/R600/mad-combine.ll +++ b/test/CodeGen/R600/mad-combine.ll @@ -32,14 +32,14 @@ declare float @llvm.fmuladd.f32(float, float, float) #0 ; SI: buffer_store_dword [[RESULT]] define void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.r600.read.tidig.x() #0 - %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 - %gep.out = getelementptr float addrspace(1)* %out, i32 %tid + %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 + %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load float addrspace(1)* %gep.0 - %b = load float addrspace(1)* %gep.1 - %c = load float addrspace(1)* %gep.2 + %a = load float, float addrspace(1)* %gep.0 + %b = load float, float addrspace(1)* %gep.1 + %c = load float, float addrspace(1)* %gep.2 %mul = fmul float %a, %b %fma = fadd float %mul, %c @@ -69,17 +69,17 @@ define void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrsp ; SI: s_endpgm define void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.r600.read.tidig.x() #0 - %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 - %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3 - %gep.out.0 = getelementptr float addrspace(1)* %out, i32 %tid - %gep.out.1 = getelementptr float addrspace(1)* %gep.out.0, i32 1 - - %a = load float addrspace(1)* %gep.0 - %b = load float addrspace(1)* %gep.1 - %c = load float addrspace(1)* %gep.2 - %d = load float addrspace(1)* %gep.3 + %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3 + %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid + %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1 + + %a = load float, float addrspace(1)* %gep.0 + %b = load float, float addrspace(1)* %gep.1 + %c = load float, float addrspace(1)* %gep.2 + %d = load float, float addrspace(1)* %gep.3 %mul = fmul float %a, %b %fma0 = fadd float %mul, %c @@ -105,14 +105,14 @@ define void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float a ; SI: buffer_store_dword [[RESULT]] define void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.r600.read.tidig.x() #0 - %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 - %gep.out = getelementptr float addrspace(1)* %out, i32 %tid + %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 + %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load float addrspace(1)* %gep.0 - %b = load float addrspace(1)* %gep.1 - %c = load float addrspace(1)* %gep.2 + %a = load float, float addrspace(1)* %gep.0 + %b = load float, float addrspace(1)* %gep.1 + %c = load float, float addrspace(1)* %gep.2 %mul = fmul float %a, %b %fma = fadd float %c, %mul @@ -135,14 +135,14 @@ define void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrsp ; SI: buffer_store_dword [[RESULT]] define void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.r600.read.tidig.x() #0 - %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 - %gep.out = getelementptr float addrspace(1)* %out, i32 %tid + %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 + %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load float addrspace(1)* %gep.0 - %b = load float addrspace(1)* %gep.1 - %c = load float addrspace(1)* %gep.2 + %a = load float, float addrspace(1)* %gep.0 + %b = load float, float addrspace(1)* %gep.1 + %c = load float, float addrspace(1)* %gep.2 %mul = fmul float %a, %b %fma = fsub float %mul, %c @@ -172,17 +172,17 @@ define void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float a ; SI: s_endpgm define void @combine_to_mad_fsub_0_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.r600.read.tidig.x() #0 - %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 - %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3 - %gep.out.0 = getelementptr float addrspace(1)* %out, i32 %tid - %gep.out.1 = getelementptr float addrspace(1)* %gep.out.0, i32 1 - - %a = load float addrspace(1)* %gep.0 - %b = load float addrspace(1)* %gep.1 - %c = load float addrspace(1)* %gep.2 - %d = load float addrspace(1)* %gep.3 + %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3 + %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid + %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1 + + %a = load float, float addrspace(1)* %gep.0 + %b = load float, float addrspace(1)* %gep.1 + %c = load float, float addrspace(1)* %gep.2 + %d = load float, float addrspace(1)* %gep.3 %mul = fmul float %a, %b %fma0 = fsub float %mul, %c @@ -207,14 +207,14 @@ define void @combine_to_mad_fsub_0_f32_2use(float addrspace(1)* noalias %out, fl ; SI: buffer_store_dword [[RESULT]] define void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.r600.read.tidig.x() #0 - %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 - %gep.out = getelementptr float addrspace(1)* %out, i32 %tid + %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 + %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load float addrspace(1)* %gep.0 - %b = load float addrspace(1)* %gep.1 - %c = load float addrspace(1)* %gep.2 + %a = load float, float addrspace(1)* %gep.0 + %b = load float, float addrspace(1)* %gep.1 + %c = load float, float addrspace(1)* %gep.2 %mul = fmul float %a, %b %fma = fsub float %c, %mul @@ -243,17 +243,17 @@ define void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float a ; SI: s_endpgm define void @combine_to_mad_fsub_1_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.r600.read.tidig.x() #0 - %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 - %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3 - %gep.out.0 = getelementptr float addrspace(1)* %out, i32 %tid - %gep.out.1 = getelementptr float addrspace(1)* %gep.out.0, i32 1 - - %a = load float addrspace(1)* %gep.0 - %b = load float addrspace(1)* %gep.1 - %c = load float addrspace(1)* %gep.2 - %d = load float addrspace(1)* %gep.3 + %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3 + %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid + %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1 + + %a = load float, float addrspace(1)* %gep.0 + %b = load float, float addrspace(1)* %gep.1 + %c = load float, float addrspace(1)* %gep.2 + %d = load float, float addrspace(1)* %gep.3 %mul = fmul float %a, %b %fma0 = fsub float %c, %mul @@ -279,14 +279,14 @@ define void @combine_to_mad_fsub_1_f32_2use(float addrspace(1)* noalias %out, fl ; SI: buffer_store_dword [[RESULT]] define void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.r600.read.tidig.x() #0 - %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 - %gep.out = getelementptr float addrspace(1)* %out, i32 %tid + %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 + %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load float addrspace(1)* %gep.0 - %b = load float addrspace(1)* %gep.1 - %c = load float addrspace(1)* %gep.2 + %a = load float, float addrspace(1)* %gep.0 + %b = load float, float addrspace(1)* %gep.1 + %c = load float, float addrspace(1)* %gep.2 %mul = fmul float %a, %b %mul.neg = fsub float -0.0, %mul @@ -317,17 +317,17 @@ define void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float a ; SI: s_endpgm define void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.r600.read.tidig.x() #0 - %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 - %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3 - %gep.out.0 = getelementptr float addrspace(1)* %out, i32 %tid - %gep.out.1 = getelementptr float addrspace(1)* %gep.out.0, i32 1 - - %a = load float addrspace(1)* %gep.0 - %b = load float addrspace(1)* %gep.1 - %c = load float addrspace(1)* %gep.2 - %d = load float addrspace(1)* %gep.3 + %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3 + %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid + %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1 + + %a = load float, float addrspace(1)* %gep.0 + %b = load float, float addrspace(1)* %gep.1 + %c = load float, float addrspace(1)* %gep.2 + %d = load float, float addrspace(1)* %gep.3 %mul = fmul float %a, %b %mul.neg = fsub float -0.0, %mul @@ -360,17 +360,17 @@ define void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1)* noalias %ou ; SI: s_endpgm define void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.r600.read.tidig.x() #0 - %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 - %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3 - %gep.out.0 = getelementptr float addrspace(1)* %out, i32 %tid - %gep.out.1 = getelementptr float addrspace(1)* %gep.out.0, i32 1 - - %a = load float addrspace(1)* %gep.0 - %b = load float addrspace(1)* %gep.1 - %c = load float addrspace(1)* %gep.2 - %d = load float addrspace(1)* %gep.3 + %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3 + %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid + %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1 + + %a = load float, float addrspace(1)* %gep.0 + %b = load float, float addrspace(1)* %gep.1 + %c = load float, float addrspace(1)* %gep.2 + %d = load float, float addrspace(1)* %gep.3 %mul = fmul float %a, %b %mul.neg = fsub float -0.0, %mul @@ -405,18 +405,18 @@ define void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1)* noalias %ou ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} define void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.r600.read.tidig.x() #0 - %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 - %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3 - %gep.4 = getelementptr float addrspace(1)* %gep.0, i32 4 - %gep.out = getelementptr float addrspace(1)* %out, i32 %tid - - %x = load float addrspace(1)* %gep.0 - %y = load float addrspace(1)* %gep.1 - %z = load float addrspace(1)* %gep.2 - %u = load float addrspace(1)* %gep.3 - %v = load float addrspace(1)* %gep.4 + %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3 + %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4 + %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid + + %x = load float, float addrspace(1)* %gep.0 + %y = load float, float addrspace(1)* %gep.1 + %z = load float, float addrspace(1)* %gep.2 + %u = load float, float addrspace(1)* %gep.3 + %v = load float, float addrspace(1)* %gep.4 %tmp0 = fmul float %u, %v %tmp1 = call float @llvm.fma.f32(float %x, float %y, float %tmp0) #0 @@ -451,18 +451,18 @@ define void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %o ; SI: s_endpgm define void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.r600.read.tidig.x() #0 - %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 - %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3 - %gep.4 = getelementptr float addrspace(1)* %gep.0, i32 4 - %gep.out = getelementptr float addrspace(1)* %out, i32 %tid - - %x = load float addrspace(1)* %gep.0 - %y = load float addrspace(1)* %gep.1 - %z = load float addrspace(1)* %gep.2 - %u = load float addrspace(1)* %gep.3 - %v = load float addrspace(1)* %gep.4 + %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3 + %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4 + %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid + + %x = load float, float addrspace(1)* %gep.0 + %y = load float, float addrspace(1)* %gep.1 + %z = load float, float addrspace(1)* %gep.2 + %u = load float, float addrspace(1)* %gep.3 + %v = load float, float addrspace(1)* %gep.4 %tmp0 = fmul float %u, %v %tmp1 = call float @llvm.fma.f32(float %y, float %z, float %tmp0) #0 @@ -496,18 +496,18 @@ define void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %o ; SI: s_endpgm define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.r600.read.tidig.x() #0 - %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 - %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3 - %gep.4 = getelementptr float addrspace(1)* %gep.0, i32 4 - %gep.out = getelementptr float addrspace(1)* %out, i32 %tid - - %x = load float addrspace(1)* %gep.0 - %y = load float addrspace(1)* %gep.1 - %z = load float addrspace(1)* %gep.2 - %u = load float addrspace(1)* %gep.3 - %v = load float addrspace(1)* %gep.4 + %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3 + %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4 + %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid + + %x = load float, float addrspace(1)* %gep.0 + %y = load float, float addrspace(1)* %gep.1 + %z = load float, float addrspace(1)* %gep.2 + %u = load float, float addrspace(1)* %gep.3 + %v = load float, float addrspace(1)* %gep.4 %tmp0 = fmul float %u, %v %tmp1 = call float @llvm.fmuladd.f32(float %x, float %y, float %tmp0) #0 @@ -542,18 +542,18 @@ define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %o ; SI: s_endpgm define void @aggressive_combine_to_mad_fsub_3_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.r600.read.tidig.x() #0 - %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 - %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3 - %gep.4 = getelementptr float addrspace(1)* %gep.0, i32 4 - %gep.out = getelementptr float addrspace(1)* %out, i32 %tid - - %x = load float addrspace(1)* %gep.0 - %y = load float addrspace(1)* %gep.1 - %z = load float addrspace(1)* %gep.2 - %u = load float addrspace(1)* %gep.3 - %v = load float addrspace(1)* %gep.4 + %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3 + %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4 + %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid + + %x = load float, float addrspace(1)* %gep.0 + %y = load float, float addrspace(1)* %gep.1 + %z = load float, float addrspace(1)* %gep.2 + %u = load float, float addrspace(1)* %gep.3 + %v = load float, float addrspace(1)* %gep.4 %tmp0 = fmul float %u, %v %tmp1 = call float @llvm.fmuladd.f32(float %y, float %z, float %tmp0) #0 |