diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2013-11-15 04:42:23 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2013-11-15 04:42:23 +0000 |
commit | 509a492442b7e889d615d3b451629c81a810aef1 (patch) | |
tree | d4facf4559ba4d9a84d7792f6e697ea2a7909d2d /test | |
parent | a0846f414366462d24e65b48deea6b2b85f6616b (diff) | |
download | external_llvm-509a492442b7e889d615d3b451629c81a810aef1.zip external_llvm-509a492442b7e889d615d3b451629c81a810aef1.tar.gz external_llvm-509a492442b7e889d615d3b451629c81a810aef1.tar.bz2 |
Add target hook to prevent folding some bitcasted loads.
This is to avoid this transformation in some cases:
fold (conv (load x)) -> (load (conv*)x)
On architectures that don't natively support some vector
loads efficiently casting the load to a smaller vector of
larger types and loading is more efficient.
Patch by Micah Villmow.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194783 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/R600/combine_vloads.ll | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/test/CodeGen/R600/combine_vloads.ll b/test/CodeGen/R600/combine_vloads.ll new file mode 100644 index 0000000..f8ec712 --- /dev/null +++ b/test/CodeGen/R600/combine_vloads.ll @@ -0,0 +1,42 @@ +; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s + +; +; kernel void combine_vloads(global char8* src, global char8* result) { +; for (int i = 0; i < 1024; ++i) +; result[i] = src[0] + src[1] + src[2] + src[3]; +; } +; + + +; 128-bit loads instead of many 8-bit +; EG-LABEL: @combine_vloads: +; EG: VTX_READ_128 +; EG: VTX_READ_128 +define void @combine_vloads(<8 x i8> addrspace(1)* nocapture %src, <8 x i8> addrspace(1)* nocapture %result) nounwind { +entry: + br label %for.body + +for.exit: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + %i.01 = phi i32 [ 0, %entry ], [ %tmp19, %for.body ] + %arrayidx_v4 = bitcast <8 x i8> addrspace(1)* %src to <32 x i8> addrspace(1)* + %0 = bitcast <32 x i8> addrspace(1)* %arrayidx_v4 to <8 x i32> addrspace(1)* + %vecload2 = load <8 x i32> addrspace(1)* %0, align 32 + %1 = bitcast <8 x i32> %vecload2 to <32 x i8> + %tmp5 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %tmp8 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %tmp9 = add nsw <8 x i8> %tmp5, %tmp8 + %tmp12 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> + %tmp13 = add nsw <8 x i8> %tmp9, %tmp12 + %tmp16 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> + %tmp17 = add nsw <8 x i8> %tmp13, %tmp16 + %scevgep = getelementptr <8 x i8> addrspace(1)* %result, i32 %i.01 + %2 = bitcast <8 x i8> %tmp17 to <2 x i32> + %3 = bitcast <8 x i8> addrspace(1)* %scevgep to <2 x i32> addrspace(1)* + store <2 x i32> %2, <2 x i32> addrspace(1)* %3, align 8 + %tmp19 = add nsw i32 %i.01, 1 + %exitcond = icmp eq i32 %tmp19, 1024 + br i1 %exitcond, label %for.exit, label %for.body +} |