From abc019968067736a499467f7db7fb758a425ca06 Mon Sep 17 00:00:00 2001
From: Nate Begeman <natebegeman@mac.com>
Date: Fri, 5 Jun 2009 21:37:30 +0000
Subject: Adapt the x86 build_vector dagcombine to the current state of the
 legalizer. build vectors with i64 elements will only appear on 32b x86 before
 legalize. Since vector widening occurs during legalize, and produces i64
 build_vector elements, the dag combiner is never run on these before legalize
 splits them into 32b elements.

Teach the build_vector dag combine in x86 back end to recognize consecutive
loads producing the low part of the vector.

Convert the two uses of TLI's consecutive load recognizer to pass LoadSDNodes
since that was required implicitly.

Add a testcase for the transform.

Old:
	subl	$28, %esp
	movl	32(%esp), %eax
	movl	4(%eax), %ecx
	movl	%ecx, 4(%esp)
	movl	(%eax), %eax
	movl	%eax, (%esp)
	movaps	(%esp), %xmm0
	pmovzxwd	%xmm0, %xmm0
	movl	36(%esp), %eax
	movaps	%xmm0, (%eax)
	addl	$28, %esp
	ret

New:
	movl	4(%esp), %eax
	pmovzxwd	(%eax), %xmm0
	movl	8(%esp), %eax
	movaps	%xmm0, (%eax)
	ret




git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@72957 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/CodeGen/X86/dagcombine-buildvector.ll | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'test/CodeGen/X86/dagcombine-buildvector.ll')
diff --git a/test/CodeGen/X86/dagcombine-buildvector.ll b/test/CodeGen/X86/dagcombine-buildvector.ll
index c89a296..b96fdfc 100644
--- a/test/CodeGen/X86/dagcombine-buildvector.ll
+++ b/test/CodeGen/X86/dagcombine-buildvector.ll
@@ -1,13 +1,25 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llvm-as < %s | llc -march=x86 -mcpu=penryn -disable-mmx -o %t -f
 ; RUN: grep unpcklpd %t | count 1
 ; RUN: grep movapd %t | count 1
+; RUN: grep movaps %t | count 1
 
 ; Shows a dag combine bug that will generate an illegal build vector
 ; with v2i64 build_vector i32, i32.
 
-define void @test(<2 x double>* %dst, <4 x double> %src) {
+define void @test(<2 x double>* %dst, <4 x double> %src) nounwind {
 entry:
         %tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32> < i32 0, i32 2 >
         store <2 x double> %tmp7.i, <2 x double>* %dst
         ret void
 }
+
+define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind {
+entry:
+        %tmp1 = load <4 x i16>* %src
+        %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+        %0 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3)
+        store <4 x i32> %0, <4 x i32>* %dest
+        ret void
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
-- 
cgit v1.1