aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2008-05-12 23:04:07 +0000
committerEvan Cheng <evan.cheng@apple.com>2008-05-12 23:04:07 +0000
commit9bfa03c6fd8e02b738e0077fd1af7b18eeeeb4c1 (patch)
tree02ded03a6c36779787a2a0d20a980084bcde378b /lib
parent38eb9f9ae6e113a92c60b964b9d2a7f0625665bd (diff)
downloadexternal_llvm-9bfa03c6fd8e02b738e0077fd1af7b18eeeeb4c1.zip
external_llvm-9bfa03c6fd8e02b738e0077fd1af7b18eeeeb4c1.tar.gz
external_llvm-9bfa03c6fd8e02b738e0077fd1af7b18eeeeb4c1.tar.bz2
Xform bitconvert(build_pair(load a, load b)) to a single load if the load locations are at the right offset from each other.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@51008 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp49
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp2
-rw-r--r--lib/Target/X86/README-SSE.txt54
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp24
4 files changed, 55 insertions, 74 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 28f32d3..684b2f6 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -177,6 +177,7 @@ namespace {
SDOperand visitSIGN_EXTEND_INREG(SDNode *N);
SDOperand visitTRUNCATE(SDNode *N);
SDOperand visitBIT_CONVERT(SDNode *N);
+ SDOperand visitBUILD_PAIR(SDNode *N);
SDOperand visitFADD(SDNode *N);
SDOperand visitFSUB(SDNode *N);
SDOperand visitFMUL(SDNode *N);
@@ -217,6 +218,7 @@ namespace {
ISD::CondCode Cond, bool foldBooleans = true);
SDOperand SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
+ SDOperand CombineConsecutiveLoads(SDNode *N, MVT::ValueType VT);
SDOperand ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT::ValueType);
SDOperand BuildSDIV(SDNode *N);
SDOperand BuildUDIV(SDNode *N);
@@ -710,6 +712,7 @@ SDOperand DAGCombiner::visit(SDNode *N) {
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
case ISD::TRUNCATE: return visitTRUNCATE(N);
case ISD::BIT_CONVERT: return visitBIT_CONVERT(N);
+ case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
case ISD::FADD: return visitFADD(N);
case ISD::FSUB: return visitFSUB(N);
case ISD::FMUL: return visitFMUL(N);
@@ -3356,6 +3359,40 @@ SDOperand DAGCombiner::visitTRUNCATE(SDNode *N) {
return ReduceLoadWidth(N);
}
+static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
+ SDOperand Elt = N->getOperand(i);
+ if (Elt.getOpcode() != ISD::MERGE_VALUES)
+ return Elt.Val;
+ return Elt.getOperand(Elt.ResNo).Val;
+}
+
+/// CombineConsecutiveLoads - build_pair (load, load) -> load
+/// if load locations are consecutive.
+SDOperand DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT::ValueType VT) {
+ assert(N->getOpcode() == ISD::BUILD_PAIR);
+
+ SDNode *LD1 = getBuildPairElt(N, 0);
+ if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
+ return SDOperand();
+ MVT::ValueType LD1VT = LD1->getValueType(0);
+ SDNode *LD2 = getBuildPairElt(N, 1);
+ const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ if (ISD::isNON_EXTLoad(LD2) &&
+ LD2->hasOneUse() &&
+ TLI.isConsecutiveLoad(LD2, LD1, MVT::getSizeInBits(LD1VT)/8, 1, MFI)) {
+ LoadSDNode *LD = cast<LoadSDNode>(LD1);
+ unsigned Align = LD->getAlignment();
+ unsigned NewAlign = TLI.getTargetMachine().getTargetData()->
+ getABITypeAlignment(MVT::getTypeForValueType(VT));
+ if ((!AfterLegalize || TLI.isTypeLegal(VT)) &&
+ TLI.isOperationLegal(ISD::LOAD, VT) && NewAlign <= Align)
+ return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(),
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->isVolatile(), Align);
+ }
+ return SDOperand();
+}
+
SDOperand DAGCombiner::visitBIT_CONVERT(SDNode *N) {
SDOperand N0 = N->getOperand(0);
MVT::ValueType VT = N->getValueType(0);
@@ -3463,10 +3500,22 @@ SDOperand DAGCombiner::visitBIT_CONVERT(SDNode *N) {
return DAG.getNode(ISD::OR, VT, X, Cst);
}
+
+ // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
+ if (N0.getOpcode() == ISD::BUILD_PAIR) {
+ SDOperand CombineLD = CombineConsecutiveLoads(N0.Val, VT);
+ if (CombineLD.Val)
+ return CombineLD;
+ }
return SDOperand();
}
+SDOperand DAGCombiner::visitBUILD_PAIR(SDNode *N) {
+ MVT::ValueType VT = N->getValueType(0);
+ return CombineConsecutiveLoads(N, VT);
+}
+
/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector
/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the
/// destination element value type.
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e57813d..408a5b2 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1514,7 +1514,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA,
/// location that the 'Base' load is loading from.
bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base,
unsigned Bytes, int Dist,
- MachineFrameInfo *MFI) const {
+ const MachineFrameInfo *MFI) const {
if (LD->getOperand(0).Val != Base->getOperand(0).Val)
return false;
MVT::ValueType VT = LD->getValueType(0);
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 1a5d904..34b949a 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -428,60 +428,6 @@ entry:
//===---------------------------------------------------------------------===//
-Consider (PR2108):
-
-#include <xmmintrin.h>
-__m128i doload64(unsigned long long x) { return _mm_loadl_epi64(&x);}
-__m128i doload64_2(unsigned long long *x) { return _mm_loadl_epi64(x);}
-
-These are very similar routines, but we generate significantly worse code for
-the first one on x86-32:
-
-_doload64:
- subl $12, %esp
- movl 20(%esp), %eax
- movl %eax, 4(%esp)
- movl 16(%esp), %eax
- movl %eax, (%esp)
- movsd (%esp), %xmm0
- addl $12, %esp
- ret
-_doload64_2:
- movl 4(%esp), %eax
- movsd (%eax), %xmm0
- ret
-
-The problem is that the argument lowering logic splits the i64 argument into
-2x i32 loads early, the f64 insert doesn't match. Here's a reduced testcase:
-
-define fastcc double @doload64(i64 %x) nounwind {
-entry:
- %tmp717 = bitcast i64 %x to double ; <double> [#uses=1]
- ret double %tmp717
-}
-
-compiles to:
-
-_doload64:
- subl $12, %esp
- movl 20(%esp), %eax
- movl %eax, 4(%esp)
- movl 16(%esp), %eax
- movl %eax, (%esp)
- movsd (%esp), %xmm0
- addl $12, %esp
- ret
-
-instead of movsd from the stack. This is actually not too bad to implement. The
-best way to do this is to implement a dag combine that turns
-bitconvert(build_pair(load a, load b)) into one load of the right type. The
-only trick to this is writing the predicate that determines that a/b are at the
-right offset from each other. For the enterprising hacker, InferAlignment is a
-helpful place to start poking if interested.
-
-
-//===---------------------------------------------------------------------===//
-
__m128d test1( __m128d A, __m128d B) {
return _mm_shuffle_pd(A, B, 0x3);
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 5d50e36..806b626 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -6285,13 +6285,7 @@ static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
LD->getAlignment());
}
-static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
- SDOperand Elt = N->getOperand(i);
- if (Elt.getOpcode() != ISD::MERGE_VALUES)
- return Elt.Val;
- return Elt.getOperand(Elt.ResNo).Val;
-}
-
+/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd.
static SDOperand PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget,
const TargetLowering &TLI) {
@@ -6312,25 +6306,17 @@ static SDOperand PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
return SDOperand();
// Value must be a load.
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
SDNode *Base = N->getOperand(0).Val;
if (!isa<LoadSDNode>(Base)) {
- if (Base->getOpcode() == ISD::BIT_CONVERT)
- Base = Base->getOperand(0).Val;
- if (Base->getOpcode() != ISD::BUILD_PAIR)
+ if (Base->getOpcode() != ISD::BIT_CONVERT)
return SDOperand();
- SDNode *Pair = Base;
- Base = getBuildPairElt(Pair, 0);
- if (!ISD::isNON_EXTLoad(Base))
- return SDOperand();
- SDNode *NextLD = getBuildPairElt(Pair, 1);
- if (!ISD::isNON_EXTLoad(NextLD) ||
- !TLI.isConsecutiveLoad(NextLD, Base, 4/*32 bits*/, 1, MFI))
+ Base = Base->getOperand(0).Val;
+ if (!isa<LoadSDNode>(Base))
return SDOperand();
}
- LoadSDNode *LD = cast<LoadSDNode>(Base);
// Transform it into VZEXT_LOAD addr.
+ LoadSDNode *LD = cast<LoadSDNode>(Base);
return DAG.getNode(X86ISD::VZEXT_LOAD, VT, LD->getChain(), LD->getBasePtr());
}