diff options
author | Craig Topper <craig.topper@gmail.com> | 2011-11-15 05:55:35 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2011-11-15 05:55:35 +0000 |
commit | 4c077a1f04c97210793d62debef250b974d168bc (patch) | |
tree | 01c8dd62cb4c1a0d7ca4471ccd5736daf687b8dc /test | |
parent | 283b419aea736f899d1e0de70b2c0355d51d6826 (diff) | |
download | external_llvm-4c077a1f04c97210793d62debef250b974d168bc.zip external_llvm-4c077a1f04c97210793d62debef250b974d168bc.tar.gz external_llvm-4c077a1f04c97210793d62debef250b974d168bc.tar.bz2 |
Properly qualify AVX2 specific parts of execution dependency table. Also enable converting between 256-bit PS/PD operations when AVX1 is enabled. Fixes PR11370.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144622 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/X86/avx-intrinsics-x86.ll | 23 | ||||
-rw-r--r-- | test/CodeGen/X86/avx-logic.ll | 32 |
2 files changed, 41 insertions, 14 deletions
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll index 3fa1d95..df12b71 100644 --- a/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -2021,7 +2021,9 @@ declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly define <32 x i8> @test_x86_avx_loadu_dq_256(i8* %a0) { ; CHECK: vmovdqu - %res = call <32 x i8> @llvm.x86.avx.loadu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] + %a1 = call <32 x i8> @llvm.x86.avx.loadu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] + ; add operation forces the execution domain. + %res = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> ret <32 x i8> %res } declare <32 x i8> @llvm.x86.avx.loadu.dq.256(i8*) nounwind readonly @@ -2029,7 +2031,9 @@ declare <32 x i8> @llvm.x86.avx.loadu.dq.256(i8*) nounwind readonly define <4 x double> @test_x86_avx_loadu_pd_256(i8* %a0) { ; CHECK: vmovupd - %res = call <4 x double> @llvm.x86.avx.loadu.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] + %a1 = call <4 x double> @llvm.x86.avx.loadu.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] + ; add operation forces the execution domain. + %res = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0> ret <4 x double> %res } declare <4 x double> @llvm.x86.avx.loadu.pd.256(i8*) nounwind readonly @@ -2157,7 +2161,9 @@ declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone define void @test_x86_avx_movnt_dq_256(i8* %a0, <4 x i64> %a1) { ; CHECK: vmovntdq - call void @llvm.x86.avx.movnt.dq.256(i8* %a0, <4 x i64> %a1) + ; add operation forces the execution domain. + %a2 = add <4 x i64> %a1, <i64 1, i64 1, i64 1, i64 1> + call void @llvm.x86.avx.movnt.dq.256(i8* %a0, <4 x i64> %a2) ret void } declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind @@ -2165,7 +2171,8 @@ declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind define void @test_x86_avx_movnt_pd_256(i8* %a0, <4 x double> %a1) { ; CHECK: vmovntpd - call void @llvm.x86.avx.movnt.pd.256(i8* %a0, <4 x double> %a1) + %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0> + call void @llvm.x86.avx.movnt.pd.256(i8* %a0, <4 x double> %a2) ret void } declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind @@ -2258,7 +2265,9 @@ declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { ; CHECK: vmovdqu - call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a1) + ; add operation forces the execution domain. + %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2) ret void } declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind @@ -2266,7 +2275,9 @@ declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) { ; CHECK: vmovupd - call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a1) + ; add operation forces the execution domain. + %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0> + call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2) ret void } declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll index cd37135..115cefb 100644 --- a/test/CodeGen/X86/avx-logic.ll +++ b/test/CodeGen/X86/avx-logic.ll @@ -7,7 +7,9 @@ entry: %1 = bitcast <4 x double> %y to <4 x i64> %and.i = and <4 x i64> %0, %1 %2 = bitcast <4 x i64> %and.i to <4 x double> - ret <4 x double> %2 + ; add forces execution domain + %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> + ret <4 x double> %3 } ; CHECK: vandpd LCP{{.*}}(%rip) @@ -16,7 +18,9 @@ entry: %0 = bitcast <4 x double> %y to <4 x i64> %and.i = and <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507> %1 = bitcast <4 x i64> %and.i to <4 x double> - ret <4 x double> %1 + ; add forces execution domain + %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0> + ret <4 x double> %2 } ; CHECK: vandps @@ -45,7 +49,9 @@ entry: %1 = bitcast <4 x double> %y to <4 x i64> %xor.i = xor <4 x i64> %0, %1 %2 = bitcast <4 x i64> %xor.i to <4 x double> - ret <4 x double> %2 + ; add forces execution domain + %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> + ret <4 x double> %3 } ; CHECK: vxorpd LCP{{.*}}(%rip) @@ -54,7 +60,9 @@ entry: %0 = bitcast <4 x double> %y to <4 x i64> %xor.i = xor <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507> %1 = bitcast <4 x i64> %xor.i to <4 x double> - ret <4 x double> %1 + ; add forces execution domain + %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0> + ret <4 x double> %2 } ; CHECK: vxorps @@ -83,7 +91,9 @@ entry: %1 = bitcast <4 x double> %y to <4 x i64> %or.i = or <4 x i64> %0, %1 %2 = bitcast <4 x i64> %or.i to <4 x double> - ret <4 x double> %2 + ; add forces execution domain + %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> + ret <4 x double> %3 } ; CHECK: vorpd LCP{{.*}}(%rip) @@ -92,7 +102,9 @@ entry: %0 = bitcast <4 x double> %y to <4 x i64> %or.i = or <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507> %1 = bitcast <4 x i64> %or.i to <4 x double> - ret <4 x double> %1 + ; add forces execution domain + %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0> + ret <4 x double> %2 } ; CHECK: vorps @@ -122,7 +134,9 @@ entry: %1 = bitcast <4 x double> %y to <4 x i64> %and.i = and <4 x i64> %1, %neg.i %2 = bitcast <4 x i64> %and.i to <4 x double> - ret <4 x double> %2 + ; add forces execution domain + %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> + ret <4 x double> %3 } ; CHECK: vandnpd (% @@ -134,7 +148,9 @@ entry: %1 = bitcast <4 x double> %tmp2 to <4 x i64> %and.i = and <4 x i64> %1, %neg.i %2 = bitcast <4 x i64> %and.i to <4 x double> - ret <4 x double> %2 + ; add forces execution domain + %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> + ret <4 x double> %3 } ; CHECK: vandnps |