diff options
author | Nadav Rotem <nadav.rotem@intel.com> | 2012-01-02 08:05:46 +0000 |
---|---|---|
committer | Nadav Rotem <nadav.rotem@intel.com> | 2012-01-02 08:05:46 +0000 |
commit | a46f35d3d65425af5eaaaf906fca240a33d6c362 (patch) | |
tree | cbe1de0c9521b4a7bef38d6733365f4c72ab619b /test | |
parent | 47f79bb58e42f1a08a7f388b8b1596ded7d49bbb (diff) | |
download | external_llvm-a46f35d3d65425af5eaaaf906fca240a33d6c362.zip external_llvm-a46f35d3d65425af5eaaaf906fca240a33d6c362.tar.gz external_llvm-a46f35d3d65425af5eaaaf906fca240a33d6c362.tar.bz2 |
Optimize the sequence blend(sign_extend(x)) to blend(shl(x)) since SSE blend instructions only look at the highest bit.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147426 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/X86/2011-12-28-vselecti8.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/X86/sext-blend.ll | 15 | ||||
-rw-r--r-- | test/CodeGen/X86/sse2-blend.ll | 16 | ||||
-rw-r--r-- | test/CodeGen/X86/sse41-blend.ll | 4 |
4 files changed, 31 insertions, 10 deletions
diff --git a/test/CodeGen/X86/2011-12-28-vselecti8.ll b/test/CodeGen/X86/2011-12-28-vselecti8.ll index dbc122a..fc1b83b 100644 --- a/test/CodeGen/X86/2011-12-28-vselecti8.ll +++ b/test/CodeGen/X86/2011-12-28-vselecti8.ll @@ -5,8 +5,10 @@ target triple = "x86_64-apple-darwin11.2.0" ; CHECK: @foo8 ; CHECK: psll -; CHECK: psraw -; CHECK: pblendvb +; CHECK-NOT: sra +; CHECK: pandn +; CHECK: pand +; CHECK: or ; CHECK: ret define void @foo8(float* nocapture %RET) nounwind { allocas: diff --git a/test/CodeGen/X86/sext-blend.ll b/test/CodeGen/X86/sext-blend.ll new file mode 100644 index 0000000..b1f9573 --- /dev/null +++ b/test/CodeGen/X86/sext-blend.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -promote-elements -mattr=+sse41 | FileCheck %s + +; CHECK: foo +define <4 x double> @foo(<4 x double> %x, <4 x double> %y) { + ; CHECK: cmpnlepd + ; CHECK: psllq + ; CHECK-NEXT: blendvpd + ; CHECK: psllq + ; CHECK-NEXT: blendvpd + ; CHECK: ret + %min_is_x = fcmp ult <4 x double> %x, %y + %min = select <4 x i1> %min_is_x, <4 x double> %x, <4 x double> %y + ret <4 x double> %min +} + diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll index 2f4317b..c6602d3 100644 --- a/test/CodeGen/X86/sse2-blend.ll +++ b/test/CodeGen/X86/sse2-blend.ll @@ -28,10 +28,10 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) { ; Without forcing instructions, fall back to the preferred PS domain. ; CHECK: vsel_i64 -; CHECK: xorps -; CHECK: andps -; CHECK: andnps -; CHECK: orps +; CHECK: pxor +; CHECK: and +; CHECK: andn +; CHECK: or ; CHECK: ret define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) { @@ -44,10 +44,10 @@ define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) { ; Without forcing instructions, fall back to the preferred PS domain. ; CHECK: vsel_double -; CHECK: xorps -; CHECK: andps -; CHECK: andnps -; CHECK: orps +; CHECK: xor +; CHECK: and +; CHECK: andn +; CHECK: or ; CHECK: ret define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) { diff --git a/test/CodeGen/X86/sse41-blend.ll b/test/CodeGen/X86/sse41-blend.ll index 78604a0..0a71dd0 100644 --- a/test/CodeGen/X86/sse41-blend.ll +++ b/test/CodeGen/X86/sse41-blend.ll @@ -36,6 +36,7 @@ define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) { ;CHECK: vsel_double +;CHECK-NOT: sra ;CHECK: blendvpd ;CHECK: ret define <4 x double> @vsel_double(<4 x double> %v1, <4 x double> %v2) { @@ -54,6 +55,7 @@ define <4 x i64> @vsel_i64(<4 x i64> %v1, <4 x i64> %v2) { ;CHECK: vsel_i8 +;CHECK-NOT: sra ;CHECK: pblendvb ;CHECK: ret define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) { @@ -65,6 +67,7 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) { ; CHECK: A define <2 x double> @A(<2 x double> %x, <2 x double> %y) { ; CHECK: cmplepd + ; CHECK-NOT: sra ; CHECK: blendvpd %max_is_x = fcmp oge <2 x double> %x, %y %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y @@ -74,6 +77,7 @@ define <2 x double> @A(<2 x double> %x, <2 x double> %y) { ; CHECK: B define <2 x double> @B(<2 x double> %x, <2 x double> %y) { ; CHECK: cmpnlepd + ; CHECK-NOT: sra ; CHECK: blendvpd %min_is_x = fcmp ult <2 x double> %x, %y %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y |