Kill and collapse outstanding DomainValues.

DomainValues that are only used by "don't care" instructions are now collapsed to the first possible execution domain after all basic blocks have been processed. This typically means the PS domain on x86. For example, the vsel_i64 and vsel_double functions in sse2-blend.ll are completely collapsed to the PS domain instead of containing a mix of execution domains created by isel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144037 91177308-0d34-0410-b5e6-96231b3b80d8
author: Jakob Stoklund Olesen <stoklund@2pi.dk> 2011-11-07 23:08:21 +0000
committer: Jakob Stoklund Olesen <stoklund@2pi.dk> 2011-11-07 23:08:21 +0000
commit: b26c7727c9a45613d9bae69995cfd719c57c5614 (patch)
tree: fc4faf0805970b46a9cea0314099f74ee0e17897 /test
parent: a29fc806fe02cea76f7896b7e344bb919dd7ac25 (diff)
download: external_llvm-b26c7727c9a45613d9bae69995cfd719c57c5614.zip
external_llvm-b26c7727c9a45613d9bae69995cfd719c57c5614.tar.gz
external_llvm-b26c7727c9a45613d9bae69995cfd719c57c5614.tar.bz2
7 files changed, 44 insertions, 23 deletions
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 276209e..3fa1d95 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -315,24 +315,31 @@ declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
 
 
 define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) {
+  ; CHECK: test_x86_sse2_movnt_dq
   ; CHECK: movl
   ; CHECK: vmovntdq
-  call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1)
+  ; add operation forces the execution domain.
+  %a2 = add <2 x i64> %a1, <i64 1, i64 1>
+  call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a2)
   ret void
 }
 declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind
 
 
 define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) {
+  ; CHECK test_x86_sse2_movnt_pd
   ; CHECK: movl
   ; CHECK: vmovntpd
-  call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1)
+  ; fadd operation forces the execution domain.
+  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
+  call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a2)
   ret void
 }
 declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind
 
 
 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_mul_sd
   ; CHECK: vmulsd
   %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
   ret <2 x double> %res
@@ -749,6 +756,7 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
 
 
 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
+  ; CHECK: test_x86_sse2_storel_dq
   ; CHECK: movl
   ; CHECK: vmovq
   call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
@@ -758,6 +766,7 @@ declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
 
 
 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
+  ; CHECK: test_x86_sse2_storeu_dq
   ; CHECK: movl
   ; CHECK: vmovdqu
   call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1)
@@ -767,15 +776,18 @@ declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
 
 
 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_storeu_pd
   ; CHECK: movl
   ; CHECK: vmovupd
-  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1)
+  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
+  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
   ret void
 }
 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
 
 
 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_sub_sd
   ; CHECK: vsubsd
   %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
   ret <2 x double> %res
diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll
index 518c09c..cd37135 100644
--- a/test/CodeGen/X86/avx-logic.ll
+++ b/test/CodeGen/X86/avx-logic.ll
@@ -165,7 +165,9 @@ entry:
 ; CHECK: vpandn  %xmm
 define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
 entry:
-  %y = xor <2 x i64> %a, <i64 -1, i64 -1>
+  ; Force the execution domain with an add.
+  %a2 = add <2 x i64> %a, <i64 1, i64 1>
+  %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
   %x = and <2 x i64> %a, %y
   ret <2 x i64> %x
 }
@@ -173,7 +175,9 @@ entry:
 ; CHECK: vpand %xmm
 define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
 entry:
-  %x = and <2 x i64> %a, %b
+  ; Force the execution domain with an add.
+  %a2 = add <2 x i64> %a, <i64 1, i64 1>
+  %x = and <2 x i64> %a2, %b
   ret <2 x i64> %x
 }
 
diff --git a/test/CodeGen/X86/nontemporal.ll b/test/CodeGen/X86/nontemporal.ll
index 1d09535..ae04435 100644
--- a/test/CodeGen/X86/nontemporal.ll
+++ b/test/CodeGen/X86/nontemporal.ll
@@ -3,13 +3,16 @@
 define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
 ; CHECK: movntps
   %cast = bitcast i8* %B to <4 x float>*
-  store <4 x float> %A, <4 x float>* %cast, align 16, !nontemporal !0
+  %A2 = fadd <4 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x4200000000000000>
+  store <4 x float> %A2, <4 x float>* %cast, align 16, !nontemporal !0
 ; CHECK: movntdq
   %cast1 = bitcast i8* %B to <2 x i64>*
-  store <2 x i64> %E, <2 x i64>* %cast1, align 16, !nontemporal !0
+  %E2 = add <2 x i64> %E, <i64 1, i64 2>
+  store <2 x i64> %E2, <2 x i64>* %cast1, align 16, !nontemporal !0
 ; CHECK: movntpd
   %cast2 = bitcast i8* %B to <2 x double>*
-  store <2 x double> %C, <2 x double>* %cast2, align 16, !nontemporal !0
+  %C2 = fadd <2 x double> %C, <double 0x0, double 0x4200000000000000>
+  store <2 x double> %C2, <2 x double>* %cast2, align 16, !nontemporal !0
 ; CHECK: movnti
   %cast3 = bitcast i8* %B to i32*
   store i32 %D, i32* %cast3, align 16, !nontemporal !0
diff --git a/test/CodeGen/X86/sse-align-3.ll b/test/CodeGen/X86/sse-align-3.ll
index 04f2161..b6b0471 100644
--- a/test/CodeGen/X86/sse-align-3.ll
+++ b/test/CodeGen/X86/sse-align-3.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
 ; CHECK-NOT:     movapd
 ; CHECK:     movaps
-; CHECK-NOT:     movaps
-; CHECK:     movapd
+; CHECK-NOT:     movapd
+; CHECK:     movaps
 ; CHECK-NOT:     movap
 
 define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll
index 4ff1d03..2f4317b 100644
--- a/test/CodeGen/X86/sse2-blend.ll
+++ b/test/CodeGen/X86/sse2-blend.ll
@@ -26,11 +26,10 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
   ret void
 }
 
-; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the
-; mixed domains here.
+; Without forcing instructions, fall back to the preferred PS domain.
 ; CHECK: vsel_i64
 ; CHECK: xorps
-; CHECK: pand
+; CHECK: andps
 ; CHECK: andnps
 ; CHECK: orps
 ; CHECK: ret
@@ -43,16 +42,14 @@ define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) {
   ret void
 }
 
-; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the
-; mixed domains here.
+; Without forcing instructions, fall back to the preferred PS domain.
 ; CHECK: vsel_double
 ; CHECK: xorps
-; CHECK: pand
+; CHECK: andps
 ; CHECK: andnps
 ; CHECK: orps
 ; CHECK: ret
 
-
 define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) {
   %A = load <4 x double>* %v1
   %B = load <4 x double>* %v2
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index d520d5c..1d74af2 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -144,7 +144,7 @@ define <2 x double> @test11(double %a, double %b) nounwind {
 	%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1		; <<2 x double>> [#uses=1]
 	ret <2 x double> %tmp7
 ; CHECK: test11:
-; CHECK: movapd	4(%esp), %xmm0
+; CHECK: movaps	4(%esp), %xmm0
 }
 
 define void @test12() nounwind {
diff --git a/test/CodeGen/X86/vec_shuffle.ll b/test/CodeGen/X86/vec_shuffle.ll
index 2a48de2..d20b3e7 100644
--- a/test/CodeGen/X86/vec_shuffle.ll
+++ b/test/CodeGen/X86/vec_shuffle.ll
@@ -1,9 +1,8 @@
-; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
-; RUN: grep movq    %t | count 1
-; RUN: grep pshufd  %t | count 1
-; RUN: grep movupd  %t | count 1
-; RUN: grep pshufhw %t | count 1
+; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
 
+; CHECK: test_v4sf
+; CHECK: movq 8(%esp)
+; CHECK: pshufd $80
 define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
 	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]
 	%tmp2 = insertelement <4 x float> %tmp, float %X, i32 1		; <<4 x float>> [#uses=1]
@@ -13,6 +12,9 @@ define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
 	ret void
 }
 
+; CHECK: test_v2sd
+; CHECK: movups	8(%esp)
+; CHECK: movaps
 define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
 	%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0		; <<2 x double>> [#uses=1]
 	%tmp2 = insertelement <2 x double> %tmp, double %Y, i32 1		; <<2 x double>> [#uses=1]
@@ -20,6 +22,9 @@ define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
 	ret void
 }
 
+; CHECK: test_v8i16
+; CHECK: pshufhw $-58
+; CHECK: movdqa
 define void @test_v8i16(<2 x i64>* %res, <2 x i64>* %A) nounwind {
 	%tmp = load <2 x i64>* %A		; <<2 x i64>> [#uses=1]
 	%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>		; <<8 x i16>> [#uses=8]
author	Jakob Stoklund Olesen <stoklund@2pi.dk>	2011-11-07 23:08:21 +0000
committer	Jakob Stoklund Olesen <stoklund@2pi.dk>	2011-11-07 23:08:21 +0000
commit	b26c7727c9a45613d9bae69995cfd719c57c5614 (patch)
tree	fc4faf0805970b46a9cea0314099f74ee0e17897 /test
parent	a29fc806fe02cea76f7896b7e344bb919dd7ac25 (diff)
download	external_llvm-b26c7727c9a45613d9bae69995cfd719c57c5614.zip external_llvm-b26c7727c9a45613d9bae69995cfd719c57c5614.tar.gz external_llvm-b26c7727c9a45613d9bae69995cfd719c57c5614.tar.bz2