diff options
author | Craig Topper <craig.topper@gmail.com> | 2012-03-20 07:17:59 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2012-03-20 07:17:59 +0000 |
commit | 89f4e6639d174d20f8be59af4895d7d851e4f624 (patch) | |
tree | e63c740f1c4c20931a1bdd628fb17af5559aa948 /lib/Target/X86 | |
parent | a1ffc681ed7372bd371c44a6e186291b6416fe47 (diff) | |
download | external_llvm-89f4e6639d174d20f8be59af4895d7d851e4f624.zip external_llvm-89f4e6639d174d20f8be59af4895d7d851e4f624.tar.gz external_llvm-89f4e6639d174d20f8be59af4895d7d851e4f624.tar.bz2 |
Remove code that prevented lowering shuffles if they are used by load and themselves used by a extract_vector_elt. This was done to allow the DAG combiner to collapse to a single element load. Unfortunately, sometimes the extract_vector_elt would disappear before DAG combine could do the transformation leaving a vector_shuffle that isel couldn't handle. New code lets the shuffle be converted to a target specific node, but then adds a combine routine that can convert target specific nodes back to vector_shuffles if the folding criteria are met.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153080 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 203 |
1 files changed, 111 insertions, 92 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index cad9a26..e32d6b9 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4346,11 +4346,13 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, /// getTargetShuffleMask - Calculates the shuffle mask corresponding to the /// target specific opcode. Returns true if the Mask could be calculated. +/// Sets IsUnary to true if only uses one source. static bool getTargetShuffleMask(SDNode *N, EVT VT, - SmallVectorImpl<int> &Mask) { + SmallVectorImpl<int> &Mask, bool &IsUnary) { unsigned NumElems = VT.getVectorNumElements(); SDValue ImmN; + IsUnary = false; switch(N->getOpcode()) { case X86ISD::SHUFP: ImmN = N->getOperand(N->getNumOperands()-1); @@ -4372,14 +4374,17 @@ static bool getTargetShuffleMask(SDNode *N, EVT VT, case X86ISD::VPERMILP: ImmN = N->getOperand(N->getNumOperands()-1); DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); + IsUnary = true; break; case X86ISD::PSHUFHW: ImmN = N->getOperand(N->getNumOperands()-1); DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); + IsUnary = true; break; case X86ISD::PSHUFLW: ImmN = N->getOperand(N->getNumOperands()-1); DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); + IsUnary = true; break; case X86ISD::MOVSS: case X86ISD::MOVSD: { @@ -4440,8 +4445,9 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, unsigned NumElems = VT.getVectorNumElements(); SmallVector<int, 16> ShuffleMask; SDValue ImmN; + bool IsUnary; - if (!getTargetShuffleMask(N, VT, ShuffleMask)) + if (!getTargetShuffleMask(N, VT, ShuffleMask, IsUnary)) return SDValue(); Index = ShuffleMask[Index]; @@ -6093,88 +6099,6 @@ static bool RelaxedMayFoldVectorLoad(SDValue V) { return false; } -/// CanFoldShuffleIntoVExtract - Check if the current shuffle is used by -/// a vector extract, and if both can be later optimized into a single load. -/// This is done in visitEXTRACT_VECTOR_ELT and the conditions are checked -/// here because otherwise a target specific shuffle node is going to be -/// emitted for this shuffle, and the optimization not done. -/// FIXME: This is probably not the best approach, but fix the problem -/// until the right path is decided. -static -bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG, - const TargetLowering &TLI) { - EVT VT = V.getValueType(); - ShuffleVectorSDNode *SVOp = dyn_cast<ShuffleVectorSDNode>(V); - - // Be sure that the vector shuffle is present in a pattern like this: - // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), c) -> (f32 load $addr) - if (!V.hasOneUse()) - return false; - - SDNode *N = *V.getNode()->use_begin(); - if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT) - return false; - - SDValue EltNo = N->getOperand(1); - if (!isa<ConstantSDNode>(EltNo)) - return false; - - // If the bit convert changed the number of elements, it is unsafe - // to examine the mask. - bool HasShuffleIntoBitcast = false; - if (V.getOpcode() == ISD::BITCAST) { - EVT SrcVT = V.getOperand(0).getValueType(); - if (SrcVT.getVectorNumElements() != VT.getVectorNumElements()) - return false; - V = V.getOperand(0); - HasShuffleIntoBitcast = true; - } - - // Select the input vector, guarding against out of range extract vector. - unsigned NumElems = VT.getVectorNumElements(); - unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); - int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt); - V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1); - - // If we are accessing the upper part of a YMM register - // then the EXTRACT_VECTOR_ELT is likely to be legalized to a sequence of - // EXTRACT_SUBVECTOR + EXTRACT_VECTOR_ELT, which are not detected at this point - // because the legalization of N did not happen yet. - if (Idx >= (int)NumElems/2 && VT.getSizeInBits() == 256) - return false; - - // Skip one more bit_convert if necessary - if (V.getOpcode() == ISD::BITCAST) { - if (!V.hasOneUse()) - return false; - V = V.getOperand(0); - } - - if (!ISD::isNormalLoad(V.getNode())) - return false; - - // Is the original load suitable? - LoadSDNode *LN0 = cast<LoadSDNode>(V); - - if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) - return false; - - if (!HasShuffleIntoBitcast) - return true; - - // If there's a bitcast before the shuffle, check if the load type and - // alignment is valid. - unsigned Align = LN0->getAlignment(); - unsigned NewAlign = - TLI.getTargetData()->getABITypeAlignment( - VT.getTypeForEVT(*DAG.getContext())); - - if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT)) - return false; - - return true; -} - static SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) { EVT VT = Op.getValueType(); @@ -6295,12 +6219,6 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, if (SVOp->isSplat()) { unsigned NumElem = VT.getVectorNumElements(); int Size = VT.getSizeInBits(); - // Special case, this is the only place now where it's allowed to return - // a vector_shuffle operation without using a target specific node, because - // *hopefully* it will be optimized away by the dag combiner. FIXME: should - // this be moved to DAGCombine instead? - if (NumElem <= 4 && CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI)) - return Op; // Use vbroadcast whenever the splat comes from a foldable load SDValue LD = isVectorBroadcast(Op, Subtarget); @@ -13018,11 +12936,109 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// XFormVExtractWithShuffleIntoLoad - Check if a vector extract from a target +/// specific shuffle of a load can be folded into a single element load. +/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but +/// shuffles have been customed lowered so we need to handle those here. +static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue InVec = N->getOperand(0); + SDValue EltNo = N->getOperand(1); + + if (!isa<ConstantSDNode>(EltNo)) + return SDValue(); + + EVT VT = InVec.getValueType(); + + bool HasShuffleIntoBitcast = false; + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + EVT BCVT = InVec.getOperand(0).getValueType(); + if (BCVT.getVectorNumElements() != VT.getVectorNumElements()) + return SDValue(); + InVec = InVec.getOperand(0); + HasShuffleIntoBitcast = true; + } + + if (!isTargetShuffle(InVec.getOpcode())) + return SDValue(); + + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + + SmallVector<int, 16> ShuffleMask; + bool UnaryShuffle; + if (!getTargetShuffleMask(InVec.getNode(), VT, ShuffleMask, UnaryShuffle)) + return SDValue(); + + // Select the input vector, guarding against out of range extract vector. + unsigned NumElems = VT.getVectorNumElements(); + int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt]; + SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0) + : InVec.getOperand(1); + + // If inputs to shuffle are the same for both ops, then allow 2 uses + unsigned AllowedUses = InVec.getOperand(0) == InVec.getOperand(1) ? 2 : 1; + + if (LdNode.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!LdNode.getNode()->hasNUsesOfValue(AllowedUses, 0)) + return SDValue(); + + AllowedUses = 1; // only allow 1 load use if we have a bitcast + LdNode = LdNode.getOperand(0); + } + + if (!ISD::isNormalLoad(LdNode.getNode())) + return SDValue(); + + LoadSDNode *LN0 = cast<LoadSDNode>(LdNode); + + if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile()) + return SDValue(); + + if (HasShuffleIntoBitcast) { + // If there's a bitcast before the shuffle, check if the load type and + // alignment is valid. + unsigned Align = LN0->getAlignment(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned NewAlign = TLI.getTargetData()-> + getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT)) + return SDValue(); + } + + // All checks match so transform back to vector_shuffle so that DAG combiner + // can finish the job + DebugLoc dl = N->getDebugLoc(); + + // Create shuffle node taking into account the case that its a unary shuffle + SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(VT) : InVec.getOperand(1); + Shuffle = DAG.getVectorShuffle(InVec.getValueType(), dl, + InVec.getOperand(0), Shuffle, + &ShuffleMask[0]); + Shuffle = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle, + EltNo); +} + /// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index /// generation and convert it from being a bunch of shuffles and extracts /// to a simple store and scalar loads to extract the elements. static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, - const TargetLowering &TLI) { + TargetLowering::DAGCombinerInfo &DCI) { + SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI); + if (NewOp.getNode()) + return NewOp; + SDValue InputVector = N->getOperand(0); // Only operate on vectors of 4 elements, where the alternative shuffling @@ -13083,6 +13099,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, unsigned EltSize = InputVector.getValueType().getVectorElementType().getSizeInBits()/8; uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy()); SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), @@ -13106,6 +13123,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { + + DebugLoc DL = N->getDebugLoc(); SDValue Cond = N->getOperand(0); // Get the LHS/RHS of the select. @@ -14910,7 +14929,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, switch (N->getOpcode()) { default: break; case ISD::EXTRACT_VECTOR_ELT: - return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this); + return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI); case ISD::VSELECT: case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); |