aboutsummaryrefslogtreecommitdiffstats
path: root/test/CodeGen/X86/avx-blend.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86/avx-blend.ll')
-rw-r--r--test/CodeGen/X86/avx-blend.ll59
1 files changed, 52 insertions, 7 deletions
diff --git a/test/CodeGen/X86/avx-blend.ll b/test/CodeGen/X86/avx-blend.ll
index 5fcd5ff..e21c7a0 100644
--- a/test/CodeGen/X86/avx-blend.ll
+++ b/test/CodeGen/X86/avx-blend.ll
@@ -3,7 +3,16 @@
; AVX128 tests:
;CHECK-LABEL: vsel_float:
-;CHECK: vblendvps
+; select mask is <i1 true, i1 false, i1 true, i1 false>.
+; Big endian representation is 0101 = 5.
+; '1' means takes the first argument, '0' means takes the second argument.
+; This is the opposite of the intel syntax, thus we expect
+; the inverted mask: 1010 = 10.
+; According to the ABI:
+; v1 is in xmm0 => first argument is xmm0.
+; v2 is in xmm1 => second argument is xmm1.
+; result is in xmm0 => destination argument.
+;CHECK: vblendps $10, %xmm1, %xmm0, %xmm0
;CHECK: ret
define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %v1, <4 x float> %v2
@@ -12,7 +21,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
;CHECK-LABEL: vsel_i32:
-;CHECK: vblendvps
+;CHECK: vblendps $10, %xmm1, %xmm0, %xmm0
;CHECK: ret
define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2
@@ -52,7 +61,13 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
;CHECK-LABEL: vsel_float8:
;CHECK-NOT: vinsertf128
-;CHECK: vblendvps
+; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>
+; which translates into the boolean mask (big endian representation):
+; 00010001 = 17.
+; '1' means takes the first argument, '0' means takes the second argument.
+; This is the opposite of the intel syntax, thus we expect
+; the inverted mask: 11101110 = 238.
+;CHECK: vblendps $238, %ymm1, %ymm0, %ymm0
;CHECK: ret
define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2
@@ -61,7 +76,7 @@ define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
;CHECK-LABEL: vsel_i328:
;CHECK-NOT: vinsertf128
-;CHECK: vblendvps
+;CHECK: vblendps $238, %ymm1, %ymm0, %ymm0
;CHECK-NEXT: ret
define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2
@@ -69,7 +84,15 @@ define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
}
;CHECK-LABEL: vsel_double8:
-;CHECK: vblendvpd
+; select mask is 2x: 0001 => intel mask: ~0001 = 14
+; ABI:
+; v1 is in ymm0 and ymm1.
+; v2 is in ymm2 and ymm3.
+; result is in ymm0 and ymm1.
+; Compute the low part: res.low = blend v1.low, v2.low, blendmask
+;CHECK: vblendpd $14, %ymm2, %ymm0, %ymm0
+; Compute the high part.
+;CHECK: vblendpd $14, %ymm3, %ymm1, %ymm1
;CHECK: ret
define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x double> %v1, <8 x double> %v2
@@ -77,7 +100,8 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
}
;CHECK-LABEL: vsel_i648:
-;CHECK: vblendvpd
+;CHECK: vblendpd $14, %ymm2, %ymm0, %ymm0
+;CHECK: vblendpd $14, %ymm3, %ymm1, %ymm1
;CHECK: ret
define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i64> %v1, <8 x i64> %v2
@@ -86,7 +110,7 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
;CHECK-LABEL: vsel_double4:
;CHECK-NOT: vinsertf128
-;CHECK: vblendvpd
+;CHECK: vshufpd $10
;CHECK-NEXT: ret
define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v1, <4 x double> %v2
@@ -112,4 +136,25 @@ define <2 x double> @testb(<2 x double> %x, <2 x double> %y) {
ret <2 x double> %min
}
+; If we can figure out a blend has a constant mask, we should emit the
+; blend instruction with an immediate mask
+define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
+; CHECK-LABEL: constant_blendvpd_avx:
+; CHECK-NOT: mov
+; CHECK: vblendpd
+; CHECK: ret
+ %1 = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> %xy, <4 x double> %ab
+ ret <4 x double> %1
+}
+
+define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) {
+; CHECK-LABEL: constant_blendvps_avx:
+; CHECK-NOT: mov
+; CHECK: vblendps
+; CHECK: ret
+ %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <8 x float> %xyzw, <8 x float> %abcd
+ ret <8 x float> %1
+}
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>)
+declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>)