diff options
author | Evan Cheng <evan.cheng@apple.com> | 2008-05-12 23:04:07 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2008-05-12 23:04:07 +0000 |
commit | 9bfa03c6fd8e02b738e0077fd1af7b18eeeeb4c1 (patch) | |
tree | 02ded03a6c36779787a2a0d20a980084bcde378b /lib | |
parent | 38eb9f9ae6e113a92c60b964b9d2a7f0625665bd (diff) | |
download | external_llvm-9bfa03c6fd8e02b738e0077fd1af7b18eeeeb4c1.zip external_llvm-9bfa03c6fd8e02b738e0077fd1af7b18eeeeb4c1.tar.gz external_llvm-9bfa03c6fd8e02b738e0077fd1af7b18eeeeb4c1.tar.bz2 |
Xform bitconvert(build_pair(load a, load b)) to a single load if the load locations are at the right offset from each other.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@51008 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 49 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/TargetLowering.cpp | 2 | ||||
-rw-r--r-- | lib/Target/X86/README-SSE.txt | 54 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 24 |
4 files changed, 55 insertions, 74 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 28f32d3..684b2f6 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -177,6 +177,7 @@ namespace { SDOperand visitSIGN_EXTEND_INREG(SDNode *N); SDOperand visitTRUNCATE(SDNode *N); SDOperand visitBIT_CONVERT(SDNode *N); + SDOperand visitBUILD_PAIR(SDNode *N); SDOperand visitFADD(SDNode *N); SDOperand visitFSUB(SDNode *N); SDOperand visitFMUL(SDNode *N); @@ -217,6 +218,7 @@ namespace { ISD::CondCode Cond, bool foldBooleans = true); SDOperand SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); + SDOperand CombineConsecutiveLoads(SDNode *N, MVT::ValueType VT); SDOperand ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT::ValueType); SDOperand BuildSDIV(SDNode *N); SDOperand BuildUDIV(SDNode *N); @@ -710,6 +712,7 @@ SDOperand DAGCombiner::visit(SDNode *N) { case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); case ISD::TRUNCATE: return visitTRUNCATE(N); case ISD::BIT_CONVERT: return visitBIT_CONVERT(N); + case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); case ISD::FADD: return visitFADD(N); case ISD::FSUB: return visitFSUB(N); case ISD::FMUL: return visitFMUL(N); @@ -3356,6 +3359,40 @@ SDOperand DAGCombiner::visitTRUNCATE(SDNode *N) { return ReduceLoadWidth(N); } +static SDNode *getBuildPairElt(SDNode *N, unsigned i) { + SDOperand Elt = N->getOperand(i); + if (Elt.getOpcode() != ISD::MERGE_VALUES) + return Elt.Val; + return Elt.getOperand(Elt.ResNo).Val; +} + +/// CombineConsecutiveLoads - build_pair (load, load) -> load +/// if load locations are consecutive. +SDOperand DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT::ValueType VT) { + assert(N->getOpcode() == ISD::BUILD_PAIR); + + SDNode *LD1 = getBuildPairElt(N, 0); + if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) + return SDOperand(); + MVT::ValueType LD1VT = LD1->getValueType(0); + SDNode *LD2 = getBuildPairElt(N, 1); + const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + if (ISD::isNON_EXTLoad(LD2) && + LD2->hasOneUse() && + TLI.isConsecutiveLoad(LD2, LD1, MVT::getSizeInBits(LD1VT)/8, 1, MFI)) { + LoadSDNode *LD = cast<LoadSDNode>(LD1); + unsigned Align = LD->getAlignment(); + unsigned NewAlign = TLI.getTargetMachine().getTargetData()-> + getABITypeAlignment(MVT::getTypeForValueType(VT)); + if ((!AfterLegalize || TLI.isTypeLegal(VT)) && + TLI.isOperationLegal(ISD::LOAD, VT) && NewAlign <= Align) + return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->isVolatile(), Align); + } + return SDOperand(); +} + SDOperand DAGCombiner::visitBIT_CONVERT(SDNode *N) { SDOperand N0 = N->getOperand(0); MVT::ValueType VT = N->getValueType(0); @@ -3463,10 +3500,22 @@ SDOperand DAGCombiner::visitBIT_CONVERT(SDNode *N) { return DAG.getNode(ISD::OR, VT, X, Cst); } + + // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. + if (N0.getOpcode() == ISD::BUILD_PAIR) { + SDOperand CombineLD = CombineConsecutiveLoads(N0.Val, VT); + if (CombineLD.Val) + return CombineLD; + } return SDOperand(); } +SDOperand DAGCombiner::visitBUILD_PAIR(SDNode *N) { + MVT::ValueType VT = N->getValueType(0); + return CombineConsecutiveLoads(N, VT); +} + /// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector /// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the /// destination element value type. diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index e57813d..408a5b2 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1514,7 +1514,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA, /// location that the 'Base' load is loading from. bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base, unsigned Bytes, int Dist, - MachineFrameInfo *MFI) const { + const MachineFrameInfo *MFI) const { if (LD->getOperand(0).Val != Base->getOperand(0).Val) return false; MVT::ValueType VT = LD->getValueType(0); diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 1a5d904..34b949a 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -428,60 +428,6 @@ entry: //===---------------------------------------------------------------------===// -Consider (PR2108): - -#include <xmmintrin.h> -__m128i doload64(unsigned long long x) { return _mm_loadl_epi64(&x);} -__m128i doload64_2(unsigned long long *x) { return _mm_loadl_epi64(x);} - -These are very similar routines, but we generate significantly worse code for -the first one on x86-32: - -_doload64: - subl $12, %esp - movl 20(%esp), %eax - movl %eax, 4(%esp) - movl 16(%esp), %eax - movl %eax, (%esp) - movsd (%esp), %xmm0 - addl $12, %esp - ret -_doload64_2: - movl 4(%esp), %eax - movsd (%eax), %xmm0 - ret - -The problem is that the argument lowering logic splits the i64 argument into -2x i32 loads early, the f64 insert doesn't match. Here's a reduced testcase: - -define fastcc double @doload64(i64 %x) nounwind { -entry: - %tmp717 = bitcast i64 %x to double ; <double> [#uses=1] - ret double %tmp717 -} - -compiles to: - -_doload64: - subl $12, %esp - movl 20(%esp), %eax - movl %eax, 4(%esp) - movl 16(%esp), %eax - movl %eax, (%esp) - movsd (%esp), %xmm0 - addl $12, %esp - ret - -instead of movsd from the stack. This is actually not too bad to implement. The -best way to do this is to implement a dag combine that turns -bitconvert(build_pair(load a, load b)) into one load of the right type. The -only trick to this is writing the predicate that determines that a/b are at the -right offset from each other. For the enterprising hacker, InferAlignment is a -helpful place to start poking if interested. - - -//===---------------------------------------------------------------------===// - __m128d test1( __m128d A, __m128d B) { return _mm_shuffle_pd(A, B, 0x3); } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5d50e36..806b626 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6285,13 +6285,7 @@ static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, LD->getAlignment()); } -static SDNode *getBuildPairElt(SDNode *N, unsigned i) { - SDOperand Elt = N->getOperand(i); - if (Elt.getOpcode() != ISD::MERGE_VALUES) - return Elt.Val; - return Elt.getOperand(Elt.ResNo).Val; -} - +/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd. static SDOperand PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget, const TargetLowering &TLI) { @@ -6312,25 +6306,17 @@ static SDOperand PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG, return SDOperand(); // Value must be a load. - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); SDNode *Base = N->getOperand(0).Val; if (!isa<LoadSDNode>(Base)) { - if (Base->getOpcode() == ISD::BIT_CONVERT) - Base = Base->getOperand(0).Val; - if (Base->getOpcode() != ISD::BUILD_PAIR) + if (Base->getOpcode() != ISD::BIT_CONVERT) return SDOperand(); - SDNode *Pair = Base; - Base = getBuildPairElt(Pair, 0); - if (!ISD::isNON_EXTLoad(Base)) - return SDOperand(); - SDNode *NextLD = getBuildPairElt(Pair, 1); - if (!ISD::isNON_EXTLoad(NextLD) || - !TLI.isConsecutiveLoad(NextLD, Base, 4/*32 bits*/, 1, MFI)) + Base = Base->getOperand(0).Val; + if (!isa<LoadSDNode>(Base)) return SDOperand(); } - LoadSDNode *LD = cast<LoadSDNode>(Base); // Transform it into VZEXT_LOAD addr. + LoadSDNode *LD = cast<LoadSDNode>(Base); return DAG.getNode(X86ISD::VZEXT_LOAD, VT, LD->getChain(), LD->getBasePtr()); } |