From e6f7c267df11a44679c35dec79787fbc276839fb Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Wed, 25 Aug 2010 22:49:25 +0000 Subject: Change handling of illegal vector types to widen when possible instead of expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This affects two places in the code: handling cross block values and handling function return and arguments. Since vectors are already widened by legalizetypes, this gives us much better code and unblocks x86-64 abi and SPU abi work. For example, this (which is a silly example of a cross-block value): define <4 x float> @test2(<4 x float> %A) nounwind { %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> %C = fadd <2 x float> %B, %B br label %BB BB: %D = fadd <2 x float> %C, %C %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> ret <4 x float> %E } Now compiles into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 addps %xmm0, %xmm0 ret previously it compiled into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 pshufd $1, %xmm0, %xmm1 ## kill: XMM0 XMM0 XMM0 insertps $0, %xmm0, %xmm0 insertps $16, %xmm1, %xmm0 addps %xmm0, %xmm0 ret This implements rdar://8230384 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112101 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/v2f32.ll | 57 ++++++++++++++++++++++++++++--------- test/CodeGen/X86/widen_shuffle-1.ll | 8 ++++-- 2 files changed, 49 insertions(+), 16 deletions(-) (limited to 'test/CodeGen/X86') diff --git a/test/CodeGen/X86/v2f32.ll b/test/CodeGen/X86/v2f32.ll index 9c4b773..76c3fdf 100644 --- a/test/CodeGen/X86/v2f32.ll +++ b/test/CodeGen/X86/v2f32.ll @@ -10,15 +10,16 @@ define void @test1(<2 x float> %Q, float *%P2) nounwind { store float %c, float* %P2 ret void ; X64: test1: -; X64-NEXT: addss %xmm1, %xmm0 -; X64-NEXT: movss %xmm0, (%rdi) +; X64-NEXT: pshufd $1, %xmm0, %xmm1 +; X64-NEXT: addss %xmm0, %xmm1 +; X64-NEXT: movss %xmm1, (%rdi) ; X64-NEXT: ret ; X32: test1: -; X32-NEXT: movss 4(%esp), %xmm0 -; X32-NEXT: addss 8(%esp), %xmm0 -; X32-NEXT: movl 12(%esp), %eax -; X32-NEXT: movss %xmm0, (%eax) +; X32-NEXT: pshufd $1, %xmm0, %xmm1 +; X32-NEXT: addss %xmm0, %xmm1 +; X32-NEXT: movl 4(%esp), %eax +; X32-NEXT: movss %xmm1, (%eax) ; X32-NEXT: ret } @@ -28,12 +29,42 @@ define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounw ret <2 x float> %Z ; X64: test2: -; X64-NEXT: insertps $0 -; X64-NEXT: insertps $16 -; X64-NEXT: insertps $0 -; X64-NEXT: insertps $16 -; X64-NEXT: addps -; X64-NEXT: movaps -; X64-NEXT: pshufd +; X64-NEXT: addps %xmm1, %xmm0 ; X64-NEXT: ret } + + +define <2 x float> @test3(<4 x float> %A) nounwind { + %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> + %C = fadd <2 x float> %B, %B + ret <2 x float> %C +; CHECK: test3: +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: ret +} + +define <2 x float> @test4(<2 x float> %A) nounwind { + %C = fadd <2 x float> %A, %A + ret <2 x float> %C +; CHECK: test4: +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: ret +} + +define <4 x float> @test5(<4 x float> %A) nounwind { + %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> + %C = fadd <2 x float> %B, %B + br label %BB + +BB: + %D = fadd <2 x float> %C, %C + %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> + ret <4 x float> %E + +; CHECK: _test5: +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: ret +} + + diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll index 25dde57..463f522 100644 --- a/test/CodeGen/X86/widen_shuffle-1.ll +++ b/test/CodeGen/X86/widen_shuffle-1.ll @@ -3,7 +3,8 @@ ; widening shuffle v3float and then a add define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind { entry: -; CHECK: insertps +; CHECK: shuf: +; CHECK: extractps ; CHECK: extractps %x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2> %val = fadd <3 x float> %x, %src2 @@ -15,7 +16,8 @@ entry: ; widening shuffle v3float with a different mask and then a add define void @shuf2(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind { entry: -; CHECK: insertps +; CHECK: shuf2: +; CHECK: extractps ; CHECK: extractps %x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2> %val = fadd <3 x float> %x, %src2 @@ -26,7 +28,7 @@ entry: ; Example of when widening a v3float operation causes the DAG to replace a node ; with the operation that we are currently widening, i.e. when replacing ; opA with opB, the DAG will produce new operations with opA. -define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) { +define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind { entry: ; CHECK: pshufd %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> -- cgit v1.1