diff options
author | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-07-22 00:15:07 +0000 |
---|---|---|
committer | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-07-22 00:15:07 +0000 |
commit | 6683efb4cdb12785d19100e69f597156e434b9f4 (patch) | |
tree | 8c03db55bfd47406fd825d674f26fe1000f7615a | |
parent | 08b076cc968be1c3d1f62f484e3620fca06877c1 (diff) | |
download | external_llvm-6683efb4cdb12785d19100e69f597156e434b9f4.zip external_llvm-6683efb4cdb12785d19100e69f597156e434b9f4.tar.gz external_llvm-6683efb4cdb12785d19100e69f597156e434b9f4.tar.bz2 |
-Inspected a AVX code block added by someone in early Feb. This was never used
and was actually very wrong, fix it and make it simpler. Also remove the
ConcatVectors function, which is unused now.
- Fix a introduction of useless nodes in r126664 and r126264. The
VUNPCKL* should never be introduced cause we don't want duplicate
nodes for 128 AVX and non-AVX modes, the actual instruction
difference only exists during isel, but not for target specific DAG
nodes. We only introduce V* target nodes when there is no 128-bit
version already there.
- Fix a fragile test and make it more useful.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@135729 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 99 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/SIMD/notvunpcklpd.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/SIMD/notvunpcklps.ll | 2 |
4 files changed, 27 insertions, 78 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 55d5680..c46ccf5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -71,9 +71,6 @@ static SDValue Extract128BitVector(SDValue Vec, SelectionDAG &DAG, DebugLoc dl); -static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG); - - /// Generate a DAG to grab 128-bits from a vector > 128 bits. This /// sets things up to match to an AVX VEXTRACTF128 instruction or a /// simple subregister reference. Idx is an index in the 128 bits we @@ -151,34 +148,6 @@ static SDValue Insert128BitVector(SDValue Result, return SDValue(); } -/// Given two vectors, concat them. -static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG) { - DebugLoc dl = Lower.getDebugLoc(); - - assert(Lower.getValueType() == Upper.getValueType() && "Mismatched vectors!"); - - EVT VT = EVT::getVectorVT(*DAG.getContext(), - Lower.getValueType().getVectorElementType(), - Lower.getValueType().getVectorNumElements() * 2); - - // TODO: Generalize to arbitrary vector length (this assumes 256-bit vectors). - assert(VT.getSizeInBits() == 256 && "Unsupported vector concat!"); - - // Insert the upper subvector. - SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Upper, - DAG.getConstant( - // This is half the length of the result - // vector. Start inserting the upper 128 - // bits here. - Lower.getValueType().getVectorNumElements(), - MVT::i32), - DAG, dl); - - // Insert the lower subvector. - Vec = Insert128BitVector(Vec, Lower, DAG.getConstant(0, MVT::i32), DAG, dl); - return Vec; -} - static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); bool is64Bit = Subtarget->is64Bit(); @@ -2734,8 +2703,6 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPS: - case X86ISD::VUNPCKLPD: case X86ISD::VUNPCKLPSY: case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: @@ -2807,8 +2774,6 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPS: - case X86ISD::VUNPCKLPD: case X86ISD::VUNPCKLPSY: case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: @@ -4111,8 +4076,6 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, break; case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPS: - case X86ISD::VUNPCKLPD: case X86ISD::VUNPCKLPSY: case X86ISD::VUNPCKLPDY: DecodeUNPCKLPMask(VT, ShuffleMask); @@ -4545,31 +4508,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); EVT ExtVT = VT.getVectorElementType(); - unsigned NumElems = Op.getNumOperands(); - // For AVX-length vectors, build the individual 128-bit pieces and - // use shuffles to put them in place. - if (VT.getSizeInBits() > 256 && - Subtarget->hasAVX() && - !ISD::isBuildVectorAllZeros(Op.getNode())) { - SmallVector<SDValue, 8> V; - V.resize(NumElems); - for (unsigned i = 0; i < NumElems; ++i) { - V[i] = Op.getOperand(i); - } - - EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2); - - // Build the lower subvector. - SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2); - // Build the upper subvector. - SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2], - NumElems/2); - - return ConcatVectors(Lower, Upper, DAG); - } - // All zero's: // - pxor (SSE2), xorps (SSE1), vpxor (128 AVX), xorp[s|d] (256 AVX) // All one's: @@ -4731,6 +4671,27 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (IsAllConstants) return SDValue(); + // For AVX-length vectors, build the individual 128-bit pieces and use + // shuffles to put them in place. + if (VT.getSizeInBits() == 256 && !ISD::isBuildVectorAllZeros(Op.getNode())) { + SmallVector<SDValue, 32> V; + for (unsigned i = 0; i < NumElems; ++i) + V.push_back(Op.getOperand(i)); + + EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2); + + // Build both the lower and upper subvector. + SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2); + SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2], + NumElems/2); + + // Recreate the wider vector with the lower and upper part. + SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Upper, + DAG.getConstant(NumElems/2, MVT::i32), DAG, dl); + return Insert128BitVector(Vec, Lower, DAG.getConstant(0, MVT::i32), + DAG, dl); + } + // Let legalizer expand 2-wide build_vectors. if (EVTBits == 64) { if (NumNonZero == 1) { @@ -5742,10 +5703,8 @@ static inline unsigned getUNPCKLOpcode(EVT VT, const X86Subtarget *Subtarget) { switch(VT.getSimpleVT().SimpleTy) { case MVT::v4i32: return X86ISD::PUNPCKLDQ; case MVT::v2i64: return X86ISD::PUNPCKLQDQ; - case MVT::v4f32: - return Subtarget->hasAVX() ? X86ISD::VUNPCKLPS : X86ISD::UNPCKLPS; - case MVT::v2f64: - return Subtarget->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD; + case MVT::v4f32: return X86ISD::UNPCKLPS; + case MVT::v2f64: return X86ISD::UNPCKLPD; case MVT::v8f32: return X86ISD::VUNPCKLPSY; case MVT::v4f64: return X86ISD::VUNPCKLPDY; case MVT::v16i8: return X86ISD::PUNPCKLBW; @@ -6053,11 +6012,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) && SVOp->getSplatIndex() == 0 && V2IsUndef) { - if (VT == MVT::v2f64) { - X86ISD::NodeType Opcode = - getSubtarget()->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD; - return getTargetShuffleNode(Opcode, dl, VT, V1, V1, DAG); - } + if (VT == MVT::v2f64) + return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG); if (VT == MVT::v2i64) return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG); } @@ -9725,9 +9681,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MOVSS: return "X86ISD::MOVSS"; case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS"; case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD"; - case X86ISD::VUNPCKLPS: return "X86ISD::VUNPCKLPS"; - case X86ISD::VUNPCKLPD: return "X86ISD::VUNPCKLPD"; - case X86ISD::VUNPCKLPSY: return "X86ISD::VUNPCKLPSY"; case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY"; case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS"; case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD"; @@ -12588,8 +12541,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPS: - case X86ISD::VUNPCKLPD: case X86ISD::VUNPCKLPSY: case X86ISD::VUNPCKLPDY: case X86ISD::MOVHLPS: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 298d451..e9cbf80 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -257,8 +257,6 @@ namespace llvm { MOVSS, UNPCKLPS, UNPCKLPD, - VUNPCKLPS, - VUNPCKLPD, VUNPCKLPSY, VUNPCKLPDY, UNPCKHPS, diff --git a/test/CodeGen/X86/SIMD/notvunpcklpd.ll b/test/CodeGen/X86/SIMD/notvunpcklpd.ll index 3afc2f2..3817ee7 100644 --- a/test/CodeGen/X86/SIMD/notvunpcklpd.ll +++ b/test/CodeGen/X86/SIMD/notvunpcklpd.ll @@ -13,7 +13,7 @@ entry: %r3 = load <4 x double>* %r, align 8 %r4 = load <4 x double>* %rb, align 8 %r11 = shufflevector <4 x double> %r3, <4 x double> %r4, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x double>> [#uses=1] -; CHECK-NOT: vunpcklpd +; CHECK-NOT: vunpcklpd %ymm %r12 = getelementptr [2 x <4 x double>]* %carray, i32 0, i32 1 store <4 x double> %r11, <4 x double>* %r12, align 4 ret void diff --git a/test/CodeGen/X86/SIMD/notvunpcklps.ll b/test/CodeGen/X86/SIMD/notvunpcklps.ll index 19daa3e..e3b115f 100644 --- a/test/CodeGen/X86/SIMD/notvunpcklps.ll +++ b/test/CodeGen/X86/SIMD/notvunpcklps.ll @@ -13,7 +13,7 @@ enmtry: %r3 = load <8 x float>* %r, align 8 %r4 = load <8 x float>* %rb, align 8 %r8 = shufflevector <8 x float> %r3, <8 x float> %r4, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x float>> [#uses=1] -; CHECK-NOT: vunpcklps +; CHECK-NOT: vunpcklps %ymm %r9 = getelementptr [2 x <8 x float>]* %carray, i32 0, i32 0 store <8 x float> %r8, <8 x float>* %r9, align 4 ret void |