Add AVX versions of blend vector operations and fix some issues noticed

in Nadav's r139285 and r139287 commits. 1) Rename vsel.ll to a more descriptive name 2) Change the order of BLEND operands to "Op1, Op2, Cond", this is necessary because PBLENDVB is already used in different places with this order, and it was being emitted in the wrong way for vselect 3) Add AVX patterns and tests for the same SSE41 instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@139305 91177308-0d34-0410-b5e6-96231b3b80d8
author: Bruno Cardoso Lopes <bruno.cardoso@gmail.com> 2011-09-08 18:05:08 +0000
committer: Bruno Cardoso Lopes <bruno.cardoso@gmail.com> 2011-09-08 18:05:08 +0000
commit: 814c6ced85e76c0e0ed0ffdea0c95b2f655847bb (patch)
tree: 4ffa29953ba28e5c6f74c05f5c49c74f5bf9e633
parent: 7db2d3a504713fce68bb859996994126af823ed0 (diff)
download: external_llvm-814c6ced85e76c0e0ed0ffdea0c95b2f655847bb.zip
external_llvm-814c6ced85e76c0e0ed0ffdea0c95b2f655847bb.tar.gz
external_llvm-814c6ced85e76c0e0ed0ffdea0c95b2f655847bb.tar.bz2
5 files changed, 69 insertions, 15 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index d74a872..cf90490 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -8697,7 +8697,7 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
   SDValue Op2 = Op.getOperand(2);
   DebugLoc DL = Op.getDebugLoc();
 
-  SDValue Ops[] = {Cond, Op1, Op2};
+  SDValue Ops[] = {Op1, Op2, Cond};
 
   assert(Op1.getValueType().isVector() && "Op1 must be a vector");
   assert(Op2.getValueType().isVector() && "Op2 must be a vector");
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 7ad9c87..c2db917 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -61,10 +61,10 @@ def X86psignd  : SDNode<"X86ISD::PSIGND",
 def X86pblendvb : SDNode<"X86ISD::PBLENDVB",
                  SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
                                       SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
-def X86blendvpd : SDNode<"X86ISD::BLENDVPD", 
+def X86blendvpd : SDNode<"X86ISD::BLENDVPD",
                   SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
                                        SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
-def X86blendvps : SDNode<"X86ISD::BLENDVPS", 
+def X86blendvps : SDNode<"X86ISD::BLENDVPS",
                  SDTypeProfile<1, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
                                       SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
 def X86pextrb  : SDNode<"X86ISD::PEXTRB",
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 9148c76..6bcba72 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -5853,9 +5853,14 @@ defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
 defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
                                          memopv32i8, int_x86_avx_blendv_ps_256>;
 
-def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, VR128:$src3),
-          (VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$src3)>,
-          Requires<[HasAVX]>;
+let Predicates = [HasAVX] in {
+  def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, VR128:$mask),
+            (VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$mask)>;
+  def : Pat<(X86blendvpd VR128:$src1, VR128:$src2, VR128:$mask),
+            (VBLENDVPDrr VR128:$src1, VR128:$src2, VR128:$mask)>;
+  def : Pat<(X86blendvps VR128:$src1, VR128:$src2, VR128:$mask),
+            (VBLENDVPSrr VR128:$src1, VR128:$src2, VR128:$mask)>;
+}
 
 /// SS41I_ternary_int - SSE 4.1 ternary operator
 let Uses = [XMM0], Constraints = "$src1 = $dst" in {
@@ -5877,16 +5882,18 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
   }
 }
 
-defm BLENDVPD     : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
-defm BLENDVPS     : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
-defm PBLENDVB     : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
 
-def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, XMM0),
-          (PBLENDVBrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>;
-def : Pat<(X86blendvpd  XMM0, VR128:$src1, VR128:$src2),
-          (BLENDVPDrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>;
-def : Pat<(X86blendvps  XMM0, VR128:$src1, VR128:$src2),
-          (BLENDVPSrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>;
+let Predicates = [HasSSE41] in {
+  def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, XMM0),
+            (PBLENDVBrr0 VR128:$src1, VR128:$src2)>;
+  def : Pat<(X86blendvpd VR128:$src1, VR128:$src2, XMM0),
+            (BLENDVPDrr0 VR128:$src1, VR128:$src2)>;
+  def : Pat<(X86blendvps VR128:$src1, VR128:$src2, XMM0),
+            (BLENDVPSrr0 VR128:$src1, VR128:$src2)>;
+}
 
 let Predicates = [HasAVX] in
 def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
diff --git a/test/CodeGen/X86/avx-blend.ll b/test/CodeGen/X86/avx-blend.ll
new file mode 100644
index 0000000..68289ad
--- /dev/null
+++ b/test/CodeGen/X86/avx-blend.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mattr=+avx -march=x86 | FileCheck %s
+
+;CHECK: vsel_float
+;CHECK: vblendvps
+;CHECK: ret
+define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
+  ret <4 x float> %vsel
+}
+
+
+;CHECK: vsel_i32
+;CHECK: vblendvps
+;CHECK: ret
+define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> %v1, <4 x i32> %v2
+  ret <4 x i32> %vsel
+}
+
+
+;CHECK: vsel_double
+;CHECK: vblendvpd
+;CHECK: ret
+define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
+  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %v1, <2 x double> %v2
+  ret <2 x double> %vsel
+}
+
+
+;CHECK: vsel_i64
+;CHECK: vblendvpd
+;CHECK: ret
+define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
+  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %v1, <2 x i64> %v2
+  ret <2 x i64> %vsel
+}
+
+
+;CHECK: vsel_i8
+;CHECK: vpblendvb
+;CHECK: ret
+define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
+  %vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
+  ret <16 x i8> %vsel
+}
+
+
diff --git a/test/CodeGen/X86/vsel.ll b/test/CodeGen/X86/sse41-blend.ll
index 3c854ac..3c854ac 100644
--- a/test/CodeGen/X86/vsel.ll
+++ b/test/CodeGen/X86/sse41-blend.ll
author	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>	2011-09-08 18:05:08 +0000
committer	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>	2011-09-08 18:05:08 +0000
commit	814c6ced85e76c0e0ed0ffdea0c95b2f655847bb (patch)
tree	4ffa29953ba28e5c6f74c05f5c49c74f5bf9e633
parent	7db2d3a504713fce68bb859996994126af823ed0 (diff)
download	external_llvm-814c6ced85e76c0e0ed0ffdea0c95b2f655847bb.zip external_llvm-814c6ced85e76c0e0ed0ffdea0c95b2f655847bb.tar.gz external_llvm-814c6ced85e76c0e0ed0ffdea0c95b2f655847bb.tar.bz2