Add a new DAGCombine optimization for BUILD_VECTOR.

If all of the inputs are zero/any_extended, create a new simple BV which can be further optimized by other BV optimizations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143297 91177308-0d34-0410-b5e6-96231b3b80d8
author: Nadav Rotem <nadav.rotem@intel.com> 2011-10-29 21:23:04 +0000
committer: Nadav Rotem <nadav.rotem@intel.com> 2011-10-29 21:23:04 +0000
commit: b00418af67b36dcd7d70a268ebba3480c1011348 (patch)
tree: bf0c4b39d2d831774d5ec7b7a182a2c29fb3c3f9
parent: f86545ecfdb48a43e62ce7dfd312913b0a24240b (diff)
download: external_llvm-b00418af67b36dcd7d70a268ebba3480c1011348.zip
external_llvm-b00418af67b36dcd7d70a268ebba3480c1011348.tar.gz
external_llvm-b00418af67b36dcd7d70a268ebba3480c1011348.tar.bz2
4 files changed, 106 insertions, 5 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b1afbf2..d96ce75 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6936,7 +6936,90 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
 
 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   unsigned NumInScalars = N->getNumOperands();
+  DebugLoc dl = N->getDebugLoc();
   EVT VT = N->getValueType(0);
+  // Check to see if this is a BUILD_VECTOR of a bunch of values
+  // which come from any_extend or zero_extend nodes. If so, we can create
+  // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
+  // optimizations.
+  EVT SourceType = MVT::Other;
+  bool allExtend = true;
+  bool allAnyExt = true;
+  for (unsigned i = 0; i < NumInScalars; ++i) {
+    SDValue In = N->getOperand(i);
+    // Ignore undef inputs.
+    if (In.getOpcode() == ISD::UNDEF) continue;
+
+    bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
+    bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
+
+    // Abort non-extend incoming values.
+    if (!ZeroExt && !AnyExt) {
+      allExtend = false;
+      break;
+    }
+
+    // The input is a ZeroExt or AnyExt. Check the original type.
+    EVT InTy = In.getOperand(0).getValueType();
+
+    // Check that all of the widened source types are the same.
+    if (SourceType == MVT::Other)
+      SourceType = InTy;
+    else if (InTy != SourceType) {
+      // Multiple income types. Abort.
+      allExtend = false;
+      break;
+    }
+
+    // Check if all of the extends are ANY_EXTENDs.
+    allAnyExt &= AnyExt;
+  }
+
+  // And we are post type-legalization,
+  // If all of the values are Ext or undef,
+  // We have a non undef entry.
+  if (LegalTypes && allExtend && SourceType != MVT::Other) {
+    bool isLE = TLI.isLittleEndian();
+    EVT InScalarTy = SourceType.getScalarType();
+    EVT OutScalarTy = N->getValueType(0).getScalarType();
+    unsigned ElemRatio = OutScalarTy.getSizeInBits()/InScalarTy.getSizeInBits();
+    assert(ElemRatio > 1 && "Invalid element size ratio");
+    SDValue Filler = allAnyExt ? DAG.getUNDEF(InScalarTy):
+                                 DAG.getConstant(0, InScalarTy);
+
+    unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements();
+    SmallVector<SDValue,8> Ops(NewBVElems , Filler);
+
+    // Populate the new build_vector
+    for (unsigned i=0; i < N->getNumOperands(); ++i) {
+      SDValue Cast = N->getOperand(i);
+      assert(Cast.getOpcode() == ISD::ANY_EXTEND ||
+             Cast.getOpcode() == ISD::ZERO_EXTEND ||
+             Cast.getOpcode() == ISD::UNDEF && "Invalid cast opcode");
+      SDValue In;
+      if (Cast.getOpcode() == ISD::UNDEF)
+        In = DAG.getUNDEF(InScalarTy);
+      else
+        In = Cast->getOperand(0);
+      unsigned Index = isLE ? (i * ElemRatio) :
+                              (i * ElemRatio + (ElemRatio - 1));
+
+      assert(Index < Ops.size() && "Invalid index");
+      Ops[Index] = In;
+    }
+
+    // The type of the new BUILD_VECTOR node.
+    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), InScalarTy, NewBVElems);
+    assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() &&
+           "Invalid vector size");
+
+    // Make the new BUILD_VECTOR.
+    SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                                 VecVT, &Ops[0], Ops.size());
+
+    // Bitcast to the desired type.
+    return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV);
+  }
 
   // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
   // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
diff --git a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
index 025ab2e..63a7da8 100644
--- a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
+++ b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+mmx | grep movd | count 3
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+mmx | grep movd | count 2
 
 define i64 @a(i32 %a, i32 %b) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/2011-10-27-tstore.ll b/test/CodeGen/X86/2011-10-27-tstore.ll
new file mode 100644
index 0000000..016e02c
--- /dev/null
+++ b/test/CodeGen/X86/2011-10-27-tstore.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK: ltstore
+;CHECK: pshufd
+;CHECK: pshufd
+;CHECK: ret
+define void @ltstore() {
+entry:
+  %in = load <4 x i32>* undef
+  %j = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  store <2 x i32> %j, <2 x i32>* undef
+  ret void
+}
+
diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll
index e91a734..0608398 100644
--- a/test/CodeGen/X86/vec_shuffle-37.ll
+++ b/test/CodeGen/X86/vec_shuffle-37.ll
@@ -26,10 +26,12 @@ entry:
 
 define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline {
 entry:
-; CHECK: movl  36({{%rdi|%rcx}})
-; CHECK-NEXT: movl  48({{%rdi|%rcx}})
-; CHECK: punpcklqdq
-; CHECK: movq    %xmm0, ({{%rsi|%rdx}})
+; CHECK: t02
+; CHECK: movaps
+; CHECK: shufps
+; CHECK: pshufd
+; CHECK: movq
+; CHECK: ret
   %0 = bitcast <8 x i32>* %source to <4 x i32>*
   %arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3
   %tmp2 = load <4 x i32>* %arrayidx, align 16
author	Nadav Rotem <nadav.rotem@intel.com>	2011-10-29 21:23:04 +0000
committer	Nadav Rotem <nadav.rotem@intel.com>	2011-10-29 21:23:04 +0000
commit	b00418af67b36dcd7d70a268ebba3480c1011348 (patch)
tree	bf0c4b39d2d831774d5ec7b7a182a2c29fb3c3f9
parent	f86545ecfdb48a43e62ce7dfd312913b0a24240b (diff)
download	external_llvm-b00418af67b36dcd7d70a268ebba3480c1011348.zip external_llvm-b00418af67b36dcd7d70a268ebba3480c1011348.tar.gz external_llvm-b00418af67b36dcd7d70a268ebba3480c1011348.tar.bz2