diff options
author | Evan Cheng <evan.cheng@apple.com> | 2008-05-13 08:35:03 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2008-05-13 08:35:03 +0000 |
commit | 411fc17b569bac71b8d0578174d027663202c203 (patch) | |
tree | bb8e0ed03e63e856988c905af714ffb7bd62205e | |
parent | 9ddb3032a8576650ce360ffef0bbed6caecc3824 (diff) | |
download | external_llvm-411fc17b569bac71b8d0578174d027663202c203.zip external_llvm-411fc17b569bac71b8d0578174d027663202c203.tar.gz external_llvm-411fc17b569bac71b8d0578174d027663202c203.tar.bz2 |
Instead of a vector load, shuffle and then extract an element. Load the element from address with an offset.
pshufd $1, (%rdi), %xmm0
movd %xmm0, %eax
=>
movl 4(%rdi), %eax
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@51026 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/CodeGen/SelectionDAG.h | 4 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 99 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 22 | ||||
-rw-r--r-- | lib/Target/X86/README-SSE.txt | 29 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 22 | ||||
-rw-r--r-- | test/CodeGen/X86/extractelement-from-arg.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/extractelement-load.ll | 9 | ||||
-rw-r--r-- | test/CodeGen/X86/sse-align-12.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/X86/vec_extract-sse4.ll | 15 |
9 files changed, 114 insertions, 96 deletions
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 7eef093..02f5cbc 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -607,6 +607,10 @@ public: /// isVerifiedDebugInfoDesc - Returns true if the specified SDOperand has /// been verified as a debug information descriptor. bool isVerifiedDebugInfoDesc(SDOperand Op) const; + + /// getShuffleScalarElt - Returns the scalar element that will make up the ith + /// element of the result of the vector shuffle. + SDOperand getShuffleScalarElt(const SDNode *N, unsigned Idx); private: void RemoveNodeFromCSEMaps(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 684b2f6..8fe6eb7 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4682,49 +4682,82 @@ SDOperand DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { } SDOperand DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { + // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) + // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) + // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) + + // Perform only after legalization to ensure build_vector / vector_shuffle + // optimizations have already been done. + if (!AfterLegalize) return SDOperand(); + SDOperand InVec = N->getOperand(0); SDOperand EltNo = N->getOperand(1); - // (vextract (v4f32 s2v (f32 load $addr)), 0) -> (f32 load $addr) - // (vextract (v4i32 bc (v4f32 s2v (f32 load $addr))), 0) -> (i32 load $addr) if (isa<ConstantSDNode>(EltNo)) { unsigned Elt = cast<ConstantSDNode>(EltNo)->getValue(); bool NewLoad = false; - if (Elt == 0) { - MVT::ValueType VT = InVec.getValueType(); - MVT::ValueType EVT = MVT::getVectorElementType(VT); - MVT::ValueType LVT = EVT; - unsigned NumElts = MVT::getVectorNumElements(VT); - if (InVec.getOpcode() == ISD::BIT_CONVERT) { - MVT::ValueType BCVT = InVec.getOperand(0).getValueType(); - if (!MVT::isVector(BCVT) || - NumElts != MVT::getVectorNumElements(BCVT)) - return SDOperand(); + MVT::ValueType VT = InVec.getValueType(); + MVT::ValueType EVT = MVT::getVectorElementType(VT); + MVT::ValueType LVT = EVT; + if (InVec.getOpcode() == ISD::BIT_CONVERT) { + MVT::ValueType BCVT = InVec.getOperand(0).getValueType(); + if (!MVT::isVector(BCVT) + || (MVT::getSizeInBits(EVT) > + MVT::getSizeInBits(MVT::getVectorElementType(BCVT)))) + return SDOperand(); + InVec = InVec.getOperand(0); + EVT = MVT::getVectorElementType(BCVT); + NewLoad = true; + } + + LoadSDNode *LN0 = NULL; + if (ISD::isNormalLoad(InVec.Val)) + LN0 = cast<LoadSDNode>(InVec); + else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && + InVec.getOperand(0).getValueType() == EVT && + ISD::isNormalLoad(InVec.getOperand(0).Val)) { + LN0 = cast<LoadSDNode>(InVec.getOperand(0)); + } else if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE) { + // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) + // => + // (load $addr+1*size) + unsigned Idx = cast<ConstantSDNode>(InVec.getOperand(2). + getOperand(Elt))->getValue(); + unsigned NumElems = InVec.getOperand(2).getNumOperands(); + InVec = (Idx < NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); + if (InVec.getOpcode() == ISD::BIT_CONVERT) InVec = InVec.getOperand(0); - EVT = MVT::getVectorElementType(BCVT); - NewLoad = true; + if (ISD::isNormalLoad(InVec.Val)) { + LN0 = cast<LoadSDNode>(InVec); + Elt = (Idx < NumElems) ? Idx : Idx - NumElems; } - if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && - InVec.getOperand(0).getValueType() == EVT && - ISD::isNormalLoad(InVec.getOperand(0).Val) && - InVec.getOperand(0).hasOneUse()) { - LoadSDNode *LN0 = cast<LoadSDNode>(InVec.getOperand(0)); - unsigned Align = LN0->getAlignment(); - if (NewLoad) { - // Check the resultant load doesn't need a higher alignment than the - // original load. - unsigned NewAlign = TLI.getTargetMachine().getTargetData()-> - getABITypeAlignment(MVT::getTypeForValueType(LVT)); - if (!TLI.isOperationLegal(ISD::LOAD, LVT) || NewAlign > Align) - return SDOperand(); - Align = NewAlign; - } + } + if (!LN0 || !LN0->hasOneUse()) + return SDOperand(); - return DAG.getLoad(LVT, LN0->getChain(), LN0->getBasePtr(), - LN0->getSrcValue(), LN0->getSrcValueOffset(), - LN0->isVolatile(), Align); - } + unsigned Align = LN0->getAlignment(); + if (NewLoad) { + // Check the resultant load doesn't need a higher alignment than the + // original load. + unsigned NewAlign = TLI.getTargetMachine().getTargetData()-> + getABITypeAlignment(MVT::getTypeForValueType(LVT)); + if (!TLI.isOperationLegal(ISD::LOAD, LVT) || NewAlign > Align) + return SDOperand(); + Align = NewAlign; + } + + SDOperand NewPtr = LN0->getBasePtr(); + if (Elt) { + unsigned PtrOff = MVT::getSizeInBits(LVT) * Elt / 8; + MVT::ValueType PtrType = NewPtr.getValueType(); + if (TLI.isBigEndian()) + PtrOff = MVT::getSizeInBits(VT) / 8 - PtrOff; + NewPtr = DAG.getNode(ISD::ADD, PtrType, NewPtr, + DAG.getConstant(PtrOff, PtrType)); } + return DAG.getLoad(LVT, LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->isVolatile(), Align); } return SDOperand(); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f05f444..058e60f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1838,6 +1838,28 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDOperand Op) const { } +/// getShuffleScalarElt - Returns the scalar element that will make up the ith +/// element of the result of the vector shuffle. +SDOperand SelectionDAG::getShuffleScalarElt(const SDNode *N, unsigned Idx) { + MVT::ValueType VT = N->getValueType(0); + SDOperand PermMask = N->getOperand(2); + unsigned NumElems = PermMask.getNumOperands(); + SDOperand V = (Idx < NumElems) ? N->getOperand(0) : N->getOperand(1); + Idx %= NumElems; + if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { + return (Idx == 0) + ? V.getOperand(0) : getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); + } + if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { + SDOperand Elt = PermMask.getOperand(Idx); + if (Elt.getOpcode() == ISD::UNDEF) + return getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); + return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Elt)->getValue()); + } + return SDOperand(); +} + + /// getNode - Gets or creates the specified node. /// SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT) { diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index c78e13b..5c681e0 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -545,35 +545,6 @@ swizzle: //===---------------------------------------------------------------------===// -These functions should produce the same code: - -#include <emmintrin.h> - -typedef long long __m128i __attribute__ ((__vector_size__ (16))); - -int foo(__m128i* val) { - return __builtin_ia32_vec_ext_v4si(*val, 1); -} -int bar(__m128i* val) { - union vs { - __m128i *_v; - int* _s; - } v = {val}; - return v._s[1]; -} - -We currently produce (with -m64): - -_foo: - pshufd $1, (%rdi), %xmm0 - movd %xmm0, %eax - ret -_bar: - movl 4(%rdi), %eax - ret - -//===---------------------------------------------------------------------===// - We should materialize vector constants like "all ones" and "signbit" with code like: diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 806b626..4cc3f27 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6182,26 +6182,6 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, } } -/// getShuffleScalarElt - Returns the scalar element that will make up the ith -/// element of the result of the vector shuffle. -static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { - MVT::ValueType VT = N->getValueType(0); - SDOperand PermMask = N->getOperand(2); - unsigned NumElems = PermMask.getNumOperands(); - SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); - i %= NumElems; - if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { - return (i == 0) - ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); - } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { - SDOperand Idx = PermMask.getOperand(i); - if (Idx.getOpcode() == ISD::UNDEF) - return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); - return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); - } - return SDOperand(); -} - /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the /// node is a GlobalAddress + offset. bool X86TargetLowering::isGAPlusOffset(SDNode *N, @@ -6240,7 +6220,7 @@ static bool EltsFromConsecutiveLoads(SDNode *N, SDOperand PermMask, } unsigned Index = cast<ConstantSDNode>(Idx)->getValue(); - SDOperand Elt = getShuffleScalarElt(N, Index, DAG); + SDOperand Elt = DAG.getShuffleScalarElt(N, Index); if (!Elt.Val || (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.Val))) return false; diff --git a/test/CodeGen/X86/extractelement-from-arg.ll b/test/CodeGen/X86/extractelement-from-arg.ll index d28f016..44704b6 100644 --- a/test/CodeGen/X86/extractelement-from-arg.ll +++ b/test/CodeGen/X86/extractelement-from-arg.ll @@ -1,6 +1,6 @@ -; RUN: llvm-as %s -o - | llc -march=x86-64 +; RUN: llvm-as %s -o - | llc -march=x86-64 -mattr=+sse2 -define void @test(float* %R, <4 x float> %X) { +define void @test(float* %R, <4 x float> %X) nounwind { %tmp = extractelement <4 x float> %X, i32 3 store float %tmp, float* %R ret void diff --git a/test/CodeGen/X86/extractelement-load.ll b/test/CodeGen/X86/extractelement-load.ll new file mode 100644 index 0000000..4850eba --- /dev/null +++ b/test/CodeGen/X86/extractelement-load.ll @@ -0,0 +1,9 @@ +; RUN: llvm-as %s -o - | llc -march=x86 -mattr=+sse2 -mcpu=yonah | not grep movd +; RUN: llvm-as %s -o - | llc -march=x86-64 -mattr=+sse2 -mcpu=yonah | not grep movd + +define i32 @t(<2 x i64>* %val) nounwind { + %tmp2 = load <2 x i64>* %val, align 16 ; <<2 x i64>> [#uses=1] + %tmp3 = bitcast <2 x i64> %tmp2 to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp4 = extractelement <4 x i32> %tmp3, i32 2 ; <i32> [#uses=1] + ret i32 %tmp4 +} diff --git a/test/CodeGen/X86/sse-align-12.ll b/test/CodeGen/X86/sse-align-12.ll index 7ff6b1e..a501660 100644 --- a/test/CodeGen/X86/sse-align-12.ll +++ b/test/CodeGen/X86/sse-align-12.ll @@ -28,8 +28,7 @@ define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind { %s = insertelement <4 x float> %r, float %b, i32 3 ret <4 x float> %s } -define <2 x double> @c(<2 x double>* %y) -{ +define <2 x double> @c(<2 x double>* %y) nounwind { %x = load <2 x double>* %y, align 8 %a = extractelement <2 x double> %x, i32 0 %c = extractelement <2 x double> %x, i32 1 @@ -37,8 +36,7 @@ define <2 x double> @c(<2 x double>* %y) %r = insertelement <2 x double> %p, double %a, i32 1 ret <2 x double> %r } -define <2 x double> @d(<2 x double>* %y, <2 x double> %z) -{ +define <2 x double> @d(<2 x double>* %y, <2 x double> %z) nounwind { %x = load <2 x double>* %y, align 8 %a = extractelement <2 x double> %x, i32 1 %c = extractelement <2 x double> %z, i32 1 diff --git a/test/CodeGen/X86/vec_extract-sse4.ll b/test/CodeGen/X86/vec_extract-sse4.ll index 1ef5e88..d6726be 100644 --- a/test/CodeGen/X86/vec_extract-sse4.ll +++ b/test/CodeGen/X86/vec_extract-sse4.ll @@ -1,29 +1,30 @@ ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -o %t -f -; RUN: grep extractps %t | count 1 -; RUN: grep pextrd %t | count 2 -; RUN: grep pshufd %t | count 1 +; RUN: grep extractps %t | count 1 +; RUN: grep pextrd %t | count 1 +; RUN: not grep pshufd %t +; RUN: not grep movss %t -define void @t1(float* %R, <4 x float>* %P1) { +define void @t1(float* %R, <4 x float>* %P1) nounwind { %X = load <4 x float>* %P1 %tmp = extractelement <4 x float> %X, i32 3 store float %tmp, float* %R ret void } -define float @t2(<4 x float>* %P1) { +define float @t2(<4 x float>* %P1) nounwind { %X = load <4 x float>* %P1 %tmp = extractelement <4 x float> %X, i32 2 ret float %tmp } -define void @t3(i32* %R, <4 x i32>* %P1) { +define void @t3(i32* %R, <4 x i32>* %P1) nounwind { %X = load <4 x i32>* %P1 %tmp = extractelement <4 x i32> %X, i32 3 store i32 %tmp, i32* %R ret void } -define i32 @t4(<4 x i32>* %P1) { +define i32 @t4(<4 x i32>* %P1) nounwind { %X = load <4 x i32>* %P1 %tmp = extractelement <4 x i32> %X, i32 3 ret i32 %tmp |