From e67a4afb5da59c02338622eea68e096ba143113f Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Tue, 4 Jun 2013 23:17:15 +0000 Subject: R600: Const/Neg/Abs can be folded to dot4 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183278 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/R600/dot4-folding.ll | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 test/CodeGen/R600/dot4-folding.ll (limited to 'test/CodeGen/R600/dot4-folding.ll') diff --git a/test/CodeGen/R600/dot4-folding.ll b/test/CodeGen/R600/dot4-folding.ll new file mode 100644 index 0000000..3e8330f --- /dev/null +++ b/test/CodeGen/R600/dot4-folding.ll @@ -0,0 +1,27 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; Exactly one constant vector can be folded into dot4, which means exactly +; 4 MOV instructions +; CHECK: @main +; CHECK: MOV +; CHECK: MOV +; CHECK: MOV +; CHECK: MOV +; CHECK-NOT: MOV +; CHECK-NOT: MOV +; CHECK-NOT: MOV +; CHECK-NOT: MOV + +define void @main(float addrspace(1)* %out) { +main_body: + %0 = load <4 x float> addrspace(8)* null + %1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %2 = call float @llvm.AMDGPU.dp4(<4 x float> %0,<4 x float> %1) + %3 = insertelement <4 x float> undef, float %2, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %3, i32 0, i32 0) + ret void +} + +declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) +attributes #1 = { readnone } -- cgit v1.1