diff options
author | Nadav Rotem <nadav.rotem@intel.com> | 2011-10-29 21:23:04 +0000 |
---|---|---|
committer | Nadav Rotem <nadav.rotem@intel.com> | 2011-10-29 21:23:04 +0000 |
commit | b00418af67b36dcd7d70a268ebba3480c1011348 (patch) | |
tree | bf0c4b39d2d831774d5ec7b7a182a2c29fb3c3f9 | |
parent | f86545ecfdb48a43e62ce7dfd312913b0a24240b (diff) | |
download | external_llvm-b00418af67b36dcd7d70a268ebba3480c1011348.zip external_llvm-b00418af67b36dcd7d70a268ebba3480c1011348.tar.gz external_llvm-b00418af67b36dcd7d70a268ebba3480c1011348.tar.bz2 |
Add a new DAGCombine optimization for BUILD_VECTOR.
If all of the inputs are zero/any_extended, create a new simple BV
which can be further optimized by other BV optimizations.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143297 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 83 | ||||
-rw-r--r-- | test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/2011-10-27-tstore.ll | 16 | ||||
-rw-r--r-- | test/CodeGen/X86/vec_shuffle-37.ll | 10 |
4 files changed, 106 insertions, 5 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b1afbf2..d96ce75 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6936,7 +6936,90 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); + // Check to see if this is a BUILD_VECTOR of a bunch of values + // which come from any_extend or zero_extend nodes. If so, we can create + // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR + // optimizations. + EVT SourceType = MVT::Other; + bool allExtend = true; + bool allAnyExt = true; + for (unsigned i = 0; i < NumInScalars; ++i) { + SDValue In = N->getOperand(i); + // Ignore undef inputs. + if (In.getOpcode() == ISD::UNDEF) continue; + + bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; + bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; + + // Abort non-extend incoming values. + if (!ZeroExt && !AnyExt) { + allExtend = false; + break; + } + + // The input is a ZeroExt or AnyExt. Check the original type. + EVT InTy = In.getOperand(0).getValueType(); + + // Check that all of the widened source types are the same. + if (SourceType == MVT::Other) + SourceType = InTy; + else if (InTy != SourceType) { + // Multiple income types. Abort. + allExtend = false; + break; + } + + // Check if all of the extends are ANY_EXTENDs. + allAnyExt &= AnyExt; + } + + // And we are post type-legalization, + // If all of the values are Ext or undef, + // We have a non undef entry. + if (LegalTypes && allExtend && SourceType != MVT::Other) { + bool isLE = TLI.isLittleEndian(); + EVT InScalarTy = SourceType.getScalarType(); + EVT OutScalarTy = N->getValueType(0).getScalarType(); + unsigned ElemRatio = OutScalarTy.getSizeInBits()/InScalarTy.getSizeInBits(); + assert(ElemRatio > 1 && "Invalid element size ratio"); + SDValue Filler = allAnyExt ? DAG.getUNDEF(InScalarTy): + DAG.getConstant(0, InScalarTy); + + unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements(); + SmallVector<SDValue,8> Ops(NewBVElems , Filler); + + // Populate the new build_vector + for (unsigned i=0; i < N->getNumOperands(); ++i) { + SDValue Cast = N->getOperand(i); + assert(Cast.getOpcode() == ISD::ANY_EXTEND || + Cast.getOpcode() == ISD::ZERO_EXTEND || + Cast.getOpcode() == ISD::UNDEF && "Invalid cast opcode"); + SDValue In; + if (Cast.getOpcode() == ISD::UNDEF) + In = DAG.getUNDEF(InScalarTy); + else + In = Cast->getOperand(0); + unsigned Index = isLE ? (i * ElemRatio) : + (i * ElemRatio + (ElemRatio - 1)); + + assert(Index < Ops.size() && "Invalid index"); + Ops[Index] = In; + } + + // The type of the new BUILD_VECTOR node. + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), InScalarTy, NewBVElems); + assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() && + "Invalid vector size"); + + // Make the new BUILD_VECTOR. + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + VecVT, &Ops[0], Ops.size()); + + // Bitcast to the desired type. + return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV); + } // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from diff --git a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll index 025ab2e..63a7da8 100644 --- a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll +++ b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-linux -mattr=+mmx | grep movd | count 3 +; RUN: llc < %s -mtriple=x86_64-linux -mattr=+mmx | grep movd | count 2 define i64 @a(i32 %a, i32 %b) nounwind readnone { entry: diff --git a/test/CodeGen/X86/2011-10-27-tstore.ll b/test/CodeGen/X86/2011-10-27-tstore.ll new file mode 100644 index 0000000..016e02c --- /dev/null +++ b/test/CodeGen/X86/2011-10-27-tstore.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +;CHECK: ltstore +;CHECK: pshufd +;CHECK: pshufd +;CHECK: ret +define void @ltstore() { +entry: + %in = load <4 x i32>* undef + %j = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1> + store <2 x i32> %j, <2 x i32>* undef + ret void +} + diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll index e91a734..0608398 100644 --- a/test/CodeGen/X86/vec_shuffle-37.ll +++ b/test/CodeGen/X86/vec_shuffle-37.ll @@ -26,10 +26,12 @@ entry: define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline { entry: -; CHECK: movl 36({{%rdi|%rcx}}) -; CHECK-NEXT: movl 48({{%rdi|%rcx}}) -; CHECK: punpcklqdq -; CHECK: movq %xmm0, ({{%rsi|%rdx}}) +; CHECK: t02 +; CHECK: movaps +; CHECK: shufps +; CHECK: pshufd +; CHECK: movq +; CHECK: ret %0 = bitcast <8 x i32>* %source to <4 x i32>* %arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3 %tmp2 = load <4 x i32>* %arrayidx, align 16 |